mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-20 13:54:36 +01:00
Full streaming support
This commit is contained in:
@@ -25,6 +25,7 @@ from frigate.api.defs.response.chat_response import (
|
||||
)
|
||||
from frigate.api.defs.tags import Tags
|
||||
from frigate.api.event import events
|
||||
from frigate.genai.utils import build_assistant_message_for_conversation
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -403,6 +404,78 @@ async def _execute_tool_internal(
|
||||
return {"error": f"Unknown tool: {tool_name}"}
|
||||
|
||||
|
||||
async def _execute_pending_tools(
|
||||
pending_tool_calls: List[Dict[str, Any]],
|
||||
request: Request,
|
||||
allowed_cameras: List[str],
|
||||
) -> tuple[List[ToolCall], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Execute a list of tool calls; return (ToolCall list for API response, tool result dicts for conversation).
|
||||
"""
|
||||
tool_calls_out: List[ToolCall] = []
|
||||
tool_results: List[Dict[str, Any]] = []
|
||||
for tool_call in pending_tool_calls:
|
||||
tool_name = tool_call["name"]
|
||||
tool_args = tool_call.get("arguments") or {}
|
||||
tool_call_id = tool_call["id"]
|
||||
logger.debug(
|
||||
f"Executing tool: {tool_name} (id: {tool_call_id}) with arguments: {json.dumps(tool_args, indent=2)}"
|
||||
)
|
||||
try:
|
||||
tool_result = await _execute_tool_internal(
|
||||
tool_name, tool_args, request, allowed_cameras
|
||||
)
|
||||
if tool_name == "search_objects" and isinstance(tool_result, list):
|
||||
tool_result = _format_events_with_local_time(tool_result)
|
||||
_keys = {
|
||||
"id",
|
||||
"camera",
|
||||
"label",
|
||||
"zones",
|
||||
"start_time_local",
|
||||
"end_time_local",
|
||||
"sub_label",
|
||||
"event_count",
|
||||
}
|
||||
tool_result = [
|
||||
{k: evt[k] for k in _keys if k in evt}
|
||||
for evt in tool_result
|
||||
if isinstance(evt, dict)
|
||||
]
|
||||
result_content = (
|
||||
json.dumps(tool_result)
|
||||
if isinstance(tool_result, (dict, list))
|
||||
else (tool_result if isinstance(tool_result, str) else str(tool_result))
|
||||
)
|
||||
tool_calls_out.append(
|
||||
ToolCall(name=tool_name, arguments=tool_args, response=result_content)
|
||||
)
|
||||
tool_results.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"content": result_content,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error executing tool {tool_name} (id: {tool_call_id}): {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
error_content = json.dumps({"error": f"Tool execution failed: {str(e)}"})
|
||||
tool_calls_out.append(
|
||||
ToolCall(name=tool_name, arguments=tool_args, response=error_content)
|
||||
)
|
||||
tool_results.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"content": error_content,
|
||||
}
|
||||
)
|
||||
return (tool_calls_out, tool_results)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/chat/completion",
|
||||
dependencies=[Depends(allow_any_authenticated())],
|
||||
@@ -527,6 +600,81 @@ Always be accurate with time calculations based on the current date provided.{ca
|
||||
f"{len(tools)} tool(s) available, max_iterations={max_iterations}"
|
||||
)
|
||||
|
||||
# True LLM streaming when client supports it and stream requested
|
||||
if body.stream and hasattr(genai_client, "chat_with_tools_stream"):
|
||||
stream_tool_calls: List[ToolCall] = []
|
||||
stream_iterations = 0
|
||||
|
||||
async def stream_body_llm():
|
||||
nonlocal conversation, stream_tool_calls, stream_iterations
|
||||
while stream_iterations < max_iterations:
|
||||
logger.debug(
|
||||
f"Streaming LLM (iteration {stream_iterations + 1}/{max_iterations}) "
|
||||
f"with {len(conversation)} message(s)"
|
||||
)
|
||||
async for event in genai_client.chat_with_tools_stream(
|
||||
messages=conversation,
|
||||
tools=tools if tools else None,
|
||||
tool_choice="auto",
|
||||
):
|
||||
kind, value = event
|
||||
if kind == "content_delta":
|
||||
yield (
|
||||
json.dumps({"type": "content", "delta": value}).encode(
|
||||
"utf-8"
|
||||
)
|
||||
+ b"\n"
|
||||
)
|
||||
elif kind == "message":
|
||||
msg = value
|
||||
if msg.get("finish_reason") == "error":
|
||||
yield (
|
||||
json.dumps(
|
||||
{
|
||||
"type": "error",
|
||||
"error": "An error occurred while processing your request.",
|
||||
}
|
||||
).encode("utf-8")
|
||||
+ b"\n"
|
||||
)
|
||||
return
|
||||
pending = msg.get("tool_calls")
|
||||
if pending:
|
||||
stream_iterations += 1
|
||||
conversation.append(
|
||||
build_assistant_message_for_conversation(
|
||||
msg.get("content"), pending
|
||||
)
|
||||
)
|
||||
executed_calls, tool_results = await _execute_pending_tools(
|
||||
pending, request, allowed_cameras
|
||||
)
|
||||
stream_tool_calls.extend(executed_calls)
|
||||
conversation.extend(tool_results)
|
||||
yield (
|
||||
json.dumps(
|
||||
{
|
||||
"type": "tool_calls",
|
||||
"tool_calls": [
|
||||
tc.model_dump() for tc in stream_tool_calls
|
||||
],
|
||||
}
|
||||
).encode("utf-8")
|
||||
+ b"\n"
|
||||
)
|
||||
break
|
||||
else:
|
||||
yield (json.dumps({"type": "done"}).encode("utf-8") + b"\n")
|
||||
return
|
||||
else:
|
||||
yield json.dumps({"type": "done"}).encode("utf-8") + b"\n"
|
||||
|
||||
return StreamingResponse(
|
||||
stream_body_llm(),
|
||||
media_type="application/x-ndjson",
|
||||
headers={"X-Accel-Buffering": "no"},
|
||||
)
|
||||
|
||||
try:
|
||||
while tool_iterations < max_iterations:
|
||||
logger.debug(
|
||||
@@ -548,23 +696,11 @@ Always be accurate with time calculations based on the current date provided.{ca
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
assistant_message = {
|
||||
"role": "assistant",
|
||||
"content": response.get("content"),
|
||||
}
|
||||
if response.get("tool_calls"):
|
||||
assistant_message["tool_calls"] = [
|
||||
{
|
||||
"id": tc["id"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc["name"],
|
||||
"arguments": json.dumps(tc["arguments"]),
|
||||
},
|
||||
}
|
||||
for tc in response["tool_calls"]
|
||||
]
|
||||
conversation.append(assistant_message)
|
||||
conversation.append(
|
||||
build_assistant_message_for_conversation(
|
||||
response.get("content"), response.get("tool_calls")
|
||||
)
|
||||
)
|
||||
|
||||
pending_tool_calls = response.get("tool_calls")
|
||||
if not pending_tool_calls:
|
||||
@@ -574,6 +710,7 @@ Always be accurate with time calculations based on the current date provided.{ca
|
||||
final_content = response.get("content") or ""
|
||||
|
||||
if body.stream:
|
||||
|
||||
async def stream_body() -> Any:
|
||||
if tool_calls:
|
||||
yield (
|
||||
@@ -590,8 +727,9 @@ Always be accurate with time calculations based on the current date provided.{ca
|
||||
# Stream content in word-sized chunks for smooth UX
|
||||
for part in _chunk_content(final_content):
|
||||
yield (
|
||||
json.dumps({"type": "content", "delta": part})
|
||||
.encode("utf-8")
|
||||
json.dumps({"type": "content", "delta": part}).encode(
|
||||
"utf-8"
|
||||
)
|
||||
+ b"\n"
|
||||
)
|
||||
yield json.dumps({"type": "done"}).encode("utf-8") + b"\n"
|
||||
@@ -614,123 +752,15 @@ Always be accurate with time calculations based on the current date provided.{ca
|
||||
).model_dump(),
|
||||
)
|
||||
|
||||
# Execute tools
|
||||
tool_iterations += 1
|
||||
logger.debug(
|
||||
f"Tool calls detected (iteration {tool_iterations}/{max_iterations}): "
|
||||
f"{len(pending_tool_calls)} tool(s) to execute"
|
||||
)
|
||||
tool_results = []
|
||||
|
||||
for tool_call in pending_tool_calls:
|
||||
tool_name = tool_call["name"]
|
||||
tool_args = tool_call["arguments"]
|
||||
tool_call_id = tool_call["id"]
|
||||
|
||||
logger.debug(
|
||||
f"Executing tool: {tool_name} (id: {tool_call_id}) with arguments: {json.dumps(tool_args, indent=2)}"
|
||||
)
|
||||
|
||||
try:
|
||||
tool_result = await _execute_tool_internal(
|
||||
tool_name, tool_args, request, allowed_cameras
|
||||
)
|
||||
|
||||
# Add local time fields to search_objects results so the LLM doesn't hallucinate timestamps
|
||||
if tool_name == "search_objects" and isinstance(tool_result, list):
|
||||
tool_result = _format_events_with_local_time(tool_result)
|
||||
_keys = {
|
||||
"id",
|
||||
"camera",
|
||||
"label",
|
||||
"zones",
|
||||
"start_time_local",
|
||||
"end_time_local",
|
||||
"sub_label",
|
||||
"event_count",
|
||||
}
|
||||
tool_result = [
|
||||
{k: evt[k] for k in _keys if k in evt}
|
||||
for evt in tool_result
|
||||
if isinstance(evt, dict)
|
||||
]
|
||||
|
||||
if isinstance(tool_result, dict):
|
||||
result_content = json.dumps(tool_result)
|
||||
result_summary = tool_result
|
||||
if isinstance(tool_result, dict) and isinstance(
|
||||
tool_result.get("content"), list
|
||||
):
|
||||
result_count = len(tool_result.get("content", []))
|
||||
result_summary = {
|
||||
"count": result_count,
|
||||
"sample": tool_result.get("content", [])[:2]
|
||||
if result_count > 0
|
||||
else [],
|
||||
}
|
||||
logger.debug(
|
||||
f"Tool {tool_name} (id: {tool_call_id}) completed successfully. "
|
||||
f"Result: {json.dumps(result_summary, indent=2)}"
|
||||
)
|
||||
elif isinstance(tool_result, list):
|
||||
result_content = json.dumps(tool_result)
|
||||
logger.debug(
|
||||
f"Tool {tool_name} (id: {tool_call_id}) completed successfully. "
|
||||
f"Result: {len(tool_result)} item(s)"
|
||||
)
|
||||
elif isinstance(tool_result, str):
|
||||
result_content = tool_result
|
||||
logger.debug(
|
||||
f"Tool {tool_name} (id: {tool_call_id}) completed successfully. "
|
||||
f"Result length: {len(result_content)} characters"
|
||||
)
|
||||
else:
|
||||
result_content = str(tool_result)
|
||||
logger.debug(
|
||||
f"Tool {tool_name} (id: {tool_call_id}) completed successfully. "
|
||||
f"Result type: {type(tool_result).__name__}"
|
||||
)
|
||||
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
name=tool_name,
|
||||
arguments=tool_args or {},
|
||||
response=result_content,
|
||||
)
|
||||
)
|
||||
tool_results.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"content": result_content,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error executing tool {tool_name} (id: {tool_call_id}): {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
error_content = json.dumps(
|
||||
{"error": f"Tool execution failed: {str(e)}"}
|
||||
)
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
name=tool_name,
|
||||
arguments=tool_args or {},
|
||||
response=error_content,
|
||||
)
|
||||
)
|
||||
tool_results.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"content": error_content,
|
||||
}
|
||||
)
|
||||
logger.debug(
|
||||
f"Tool {tool_name} (id: {tool_call_id}) failed. Error result added to conversation."
|
||||
)
|
||||
|
||||
executed_calls, tool_results = await _execute_pending_tools(
|
||||
pending_tool_calls, request, allowed_cameras
|
||||
)
|
||||
tool_calls.extend(executed_calls)
|
||||
conversation.extend(tool_results)
|
||||
logger.debug(
|
||||
f"Added {len(tool_results)} tool result(s) to conversation. "
|
||||
|
||||
@@ -5,10 +5,12 @@ import json
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
from frigate.config import GenAIProviderEnum
|
||||
from frigate.genai import GenAIClient, register_genai_provider
|
||||
from frigate.genai.utils import parse_tool_calls_from_message
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -99,7 +101,76 @@ class LlamaCppClient(GenAIClient):
|
||||
|
||||
def get_context_size(self) -> int:
|
||||
"""Get the context window size for llama.cpp."""
|
||||
return self.genai_config.provider_options.get("context_size", 4096)
|
||||
return self.provider_options.get("context_size", 4096)
|
||||
|
||||
def _build_payload(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: Optional[list[dict[str, Any]]],
|
||||
tool_choice: Optional[str],
|
||||
stream: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Build request payload for chat completions (sync or stream)."""
|
||||
openai_tool_choice = None
|
||||
if tool_choice:
|
||||
if tool_choice == "none":
|
||||
openai_tool_choice = "none"
|
||||
elif tool_choice == "auto":
|
||||
openai_tool_choice = "auto"
|
||||
elif tool_choice == "required":
|
||||
openai_tool_choice = "required"
|
||||
|
||||
payload: dict[str, Any] = {"messages": messages}
|
||||
if stream:
|
||||
payload["stream"] = True
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
if openai_tool_choice is not None:
|
||||
payload["tool_choice"] = openai_tool_choice
|
||||
provider_opts = {
|
||||
k: v for k, v in self.provider_options.items() if k != "context_size"
|
||||
}
|
||||
payload.update(provider_opts)
|
||||
return payload
|
||||
|
||||
def _message_from_choice(self, choice: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Parse OpenAI-style choice into {content, tool_calls, finish_reason}."""
|
||||
message = choice.get("message", {})
|
||||
content = message.get("content")
|
||||
content = content.strip() if content else None
|
||||
tool_calls = parse_tool_calls_from_message(message)
|
||||
finish_reason = choice.get("finish_reason") or (
|
||||
"tool_calls" if tool_calls else "stop" if content else "error"
|
||||
)
|
||||
return {
|
||||
"content": content,
|
||||
"tool_calls": tool_calls,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _streamed_tool_calls_to_list(
|
||||
tool_calls_by_index: dict[int, dict[str, Any]],
|
||||
) -> Optional[list[dict[str, Any]]]:
|
||||
"""Convert streamed tool_calls index map to list of {id, name, arguments}."""
|
||||
if not tool_calls_by_index:
|
||||
return None
|
||||
result = []
|
||||
for idx in sorted(tool_calls_by_index.keys()):
|
||||
t = tool_calls_by_index[idx]
|
||||
args_str = t.get("arguments") or "{}"
|
||||
try:
|
||||
arguments = json.loads(args_str)
|
||||
except json.JSONDecodeError:
|
||||
arguments = {}
|
||||
result.append(
|
||||
{
|
||||
"id": t.get("id", ""),
|
||||
"name": t.get("name", ""),
|
||||
"arguments": arguments,
|
||||
}
|
||||
)
|
||||
return result if result else None
|
||||
|
||||
def chat_with_tools(
|
||||
self,
|
||||
@@ -122,31 +193,8 @@ class LlamaCppClient(GenAIClient):
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
}
|
||||
|
||||
try:
|
||||
openai_tool_choice = None
|
||||
if tool_choice:
|
||||
if tool_choice == "none":
|
||||
openai_tool_choice = "none"
|
||||
elif tool_choice == "auto":
|
||||
openai_tool_choice = "auto"
|
||||
elif tool_choice == "required":
|
||||
openai_tool_choice = "required"
|
||||
|
||||
payload = {
|
||||
"messages": messages,
|
||||
}
|
||||
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
if openai_tool_choice is not None:
|
||||
payload["tool_choice"] = openai_tool_choice
|
||||
|
||||
provider_opts = {
|
||||
k: v for k, v in self.provider_options.items() if k != "context_size"
|
||||
}
|
||||
payload.update(provider_opts)
|
||||
|
||||
payload = self._build_payload(messages, tools, tool_choice, stream=False)
|
||||
response = requests.post(
|
||||
f"{self.provider}/v1/chat/completions",
|
||||
json=payload,
|
||||
@@ -154,60 +202,13 @@ class LlamaCppClient(GenAIClient):
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if result is None or "choices" not in result or len(result["choices"]) == 0:
|
||||
return {
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
}
|
||||
|
||||
choice = result["choices"][0]
|
||||
message = choice.get("message", {})
|
||||
|
||||
content = message.get("content")
|
||||
if content:
|
||||
content = content.strip()
|
||||
else:
|
||||
content = None
|
||||
|
||||
tool_calls = None
|
||||
if "tool_calls" in message and message["tool_calls"]:
|
||||
tool_calls = []
|
||||
for tool_call in message["tool_calls"]:
|
||||
try:
|
||||
function_data = tool_call.get("function", {})
|
||||
arguments_str = function_data.get("arguments", "{}")
|
||||
arguments = json.loads(arguments_str)
|
||||
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
logger.warning(
|
||||
f"Failed to parse tool call arguments: {e}, "
|
||||
f"tool: {function_data.get('name', 'unknown')}"
|
||||
)
|
||||
arguments = {}
|
||||
|
||||
tool_calls.append(
|
||||
{
|
||||
"id": tool_call.get("id", ""),
|
||||
"name": function_data.get("name", ""),
|
||||
"arguments": arguments,
|
||||
}
|
||||
)
|
||||
|
||||
finish_reason = "error"
|
||||
if "finish_reason" in choice and choice["finish_reason"]:
|
||||
finish_reason = choice["finish_reason"]
|
||||
elif tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif content:
|
||||
finish_reason = "stop"
|
||||
|
||||
return {
|
||||
"content": content,
|
||||
"tool_calls": tool_calls,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
|
||||
return self._message_from_choice(result["choices"][0])
|
||||
except requests.exceptions.Timeout as e:
|
||||
logger.warning("llama.cpp request timed out: %s", str(e))
|
||||
return {
|
||||
@@ -219,8 +220,7 @@ class LlamaCppClient(GenAIClient):
|
||||
error_detail = str(e)
|
||||
if hasattr(e, "response") and e.response is not None:
|
||||
try:
|
||||
error_body = e.response.text
|
||||
error_detail = f"{str(e)} - Response: {error_body[:500]}"
|
||||
error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
|
||||
except Exception:
|
||||
pass
|
||||
logger.warning("llama.cpp returned an error: %s", error_detail)
|
||||
@@ -236,3 +236,106 @@ class LlamaCppClient(GenAIClient):
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
}
|
||||
|
||||
async def chat_with_tools_stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: Optional[list[dict[str, Any]]] = None,
|
||||
tool_choice: Optional[str] = "auto",
|
||||
):
|
||||
"""Stream chat with tools via OpenAI-compatible streaming API."""
|
||||
if self.provider is None:
|
||||
logger.warning(
|
||||
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
|
||||
)
|
||||
yield (
|
||||
"message",
|
||||
{
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
},
|
||||
)
|
||||
return
|
||||
try:
|
||||
payload = self._build_payload(messages, tools, tool_choice, stream=True)
|
||||
content_parts: list[str] = []
|
||||
tool_calls_by_index: dict[int, dict[str, Any]] = {}
|
||||
finish_reason = "stop"
|
||||
|
||||
async with httpx.AsyncClient(timeout=float(self.timeout)) as client:
|
||||
async with client.stream(
|
||||
"POST",
|
||||
f"{self.provider}/v1/chat/completions",
|
||||
json=payload,
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
async for line in response.aiter_lines():
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
data_str = line[6:].strip()
|
||||
if data_str == "[DONE]":
|
||||
break
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
choices = data.get("choices") or []
|
||||
if not choices:
|
||||
continue
|
||||
delta = choices[0].get("delta", {})
|
||||
if choices[0].get("finish_reason"):
|
||||
finish_reason = choices[0]["finish_reason"]
|
||||
if delta.get("content"):
|
||||
content_parts.append(delta["content"])
|
||||
yield ("content_delta", delta["content"])
|
||||
for tc in delta.get("tool_calls") or []:
|
||||
idx = tc.get("index", 0)
|
||||
if idx not in tool_calls_by_index:
|
||||
tool_calls_by_index[idx] = {
|
||||
"id": tc.get("id", ""),
|
||||
"name": tc.get("name", ""),
|
||||
"arguments": "",
|
||||
}
|
||||
t = tool_calls_by_index[idx]
|
||||
if tc.get("id"):
|
||||
t["id"] = tc["id"]
|
||||
if tc.get("name"):
|
||||
t["name"] = tc["name"]
|
||||
if tc.get("arguments"):
|
||||
t["arguments"] += tc["arguments"]
|
||||
|
||||
full_content = "".join(content_parts).strip() or None
|
||||
tool_calls_list = self._streamed_tool_calls_to_list(tool_calls_by_index)
|
||||
if tool_calls_list:
|
||||
finish_reason = "tool_calls"
|
||||
yield (
|
||||
"message",
|
||||
{
|
||||
"content": full_content,
|
||||
"tool_calls": tool_calls_list,
|
||||
"finish_reason": finish_reason,
|
||||
},
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.warning("llama.cpp streaming HTTP error: %s", e)
|
||||
yield (
|
||||
"message",
|
||||
{
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Unexpected error in llama.cpp chat_with_tools_stream: %s", str(e)
|
||||
)
|
||||
yield (
|
||||
"message",
|
||||
{
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
"""Ollama Provider for Frigate AI."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
from httpx import RemoteProtocolError, TimeoutException
|
||||
from ollama import AsyncClient as OllamaAsyncClient
|
||||
from ollama import Client as ApiClient
|
||||
from ollama import ResponseError
|
||||
|
||||
from frigate.config import GenAIProviderEnum
|
||||
from frigate.genai import GenAIClient, register_genai_provider
|
||||
from frigate.genai.utils import parse_tool_calls_from_message
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -88,6 +89,73 @@ class OllamaClient(GenAIClient):
|
||||
"num_ctx", 4096
|
||||
)
|
||||
|
||||
def _build_request_params(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: Optional[list[dict[str, Any]]],
|
||||
tool_choice: Optional[str],
|
||||
stream: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Build request_messages and params for chat (sync or stream)."""
|
||||
request_messages = []
|
||||
for msg in messages:
|
||||
msg_dict = {
|
||||
"role": msg.get("role"),
|
||||
"content": msg.get("content", ""),
|
||||
}
|
||||
if msg.get("tool_call_id"):
|
||||
msg_dict["tool_call_id"] = msg["tool_call_id"]
|
||||
if msg.get("name"):
|
||||
msg_dict["name"] = msg["name"]
|
||||
if msg.get("tool_calls"):
|
||||
msg_dict["tool_calls"] = msg["tool_calls"]
|
||||
request_messages.append(msg_dict)
|
||||
|
||||
request_params: dict[str, Any] = {
|
||||
"model": self.genai_config.model,
|
||||
"messages": request_messages,
|
||||
**self.provider_options,
|
||||
}
|
||||
if stream:
|
||||
request_params["stream"] = True
|
||||
if tools:
|
||||
request_params["tools"] = tools
|
||||
if tool_choice:
|
||||
request_params["tool_choice"] = (
|
||||
"none"
|
||||
if tool_choice == "none"
|
||||
else "required"
|
||||
if tool_choice == "required"
|
||||
else "auto"
|
||||
)
|
||||
return request_params
|
||||
|
||||
def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Parse Ollama chat response into {content, tool_calls, finish_reason}."""
|
||||
if not response or "message" not in response:
|
||||
return {
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
}
|
||||
message = response["message"]
|
||||
content = message.get("content", "").strip() if message.get("content") else None
|
||||
tool_calls = parse_tool_calls_from_message(message)
|
||||
finish_reason = "error"
|
||||
if response.get("done"):
|
||||
finish_reason = (
|
||||
"tool_calls" if tool_calls else "stop" if content else "error"
|
||||
)
|
||||
elif tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif content:
|
||||
finish_reason = "stop"
|
||||
return {
|
||||
"content": content,
|
||||
"tool_calls": tool_calls,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
|
||||
def chat_with_tools(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
@@ -103,93 +171,12 @@ class OllamaClient(GenAIClient):
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
}
|
||||
|
||||
try:
|
||||
request_messages = []
|
||||
for msg in messages:
|
||||
msg_dict = {
|
||||
"role": msg.get("role"),
|
||||
"content": msg.get("content", ""),
|
||||
}
|
||||
if msg.get("tool_call_id"):
|
||||
msg_dict["tool_call_id"] = msg["tool_call_id"]
|
||||
if msg.get("name"):
|
||||
msg_dict["name"] = msg["name"]
|
||||
if msg.get("tool_calls"):
|
||||
msg_dict["tool_calls"] = msg["tool_calls"]
|
||||
request_messages.append(msg_dict)
|
||||
|
||||
request_params = {
|
||||
"model": self.genai_config.model,
|
||||
"messages": request_messages,
|
||||
}
|
||||
|
||||
if tools:
|
||||
request_params["tools"] = tools
|
||||
if tool_choice:
|
||||
if tool_choice == "none":
|
||||
request_params["tool_choice"] = "none"
|
||||
elif tool_choice == "required":
|
||||
request_params["tool_choice"] = "required"
|
||||
elif tool_choice == "auto":
|
||||
request_params["tool_choice"] = "auto"
|
||||
|
||||
request_params.update(self.provider_options)
|
||||
|
||||
response = self.provider.chat(**request_params)
|
||||
|
||||
if not response or "message" not in response:
|
||||
return {
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
}
|
||||
|
||||
message = response["message"]
|
||||
content = (
|
||||
message.get("content", "").strip() if message.get("content") else None
|
||||
request_params = self._build_request_params(
|
||||
messages, tools, tool_choice, stream=False
|
||||
)
|
||||
|
||||
tool_calls = None
|
||||
if "tool_calls" in message and message["tool_calls"]:
|
||||
tool_calls = []
|
||||
for tool_call in message["tool_calls"]:
|
||||
try:
|
||||
function_data = tool_call.get("function", {})
|
||||
arguments_str = function_data.get("arguments", "{}")
|
||||
arguments = json.loads(arguments_str)
|
||||
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
logger.warning(
|
||||
f"Failed to parse tool call arguments: {e}, "
|
||||
f"tool: {function_data.get('name', 'unknown')}"
|
||||
)
|
||||
arguments = {}
|
||||
|
||||
tool_calls.append(
|
||||
{
|
||||
"id": tool_call.get("id", ""),
|
||||
"name": function_data.get("name", ""),
|
||||
"arguments": arguments,
|
||||
}
|
||||
)
|
||||
|
||||
finish_reason = "error"
|
||||
if "done" in response and response["done"]:
|
||||
if tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif content:
|
||||
finish_reason = "stop"
|
||||
elif tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif content:
|
||||
finish_reason = "stop"
|
||||
|
||||
return {
|
||||
"content": content,
|
||||
"tool_calls": tool_calls,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
|
||||
response = self.provider.chat(**request_params)
|
||||
return self._message_from_response(response)
|
||||
except (TimeoutException, ResponseError, ConnectionError) as e:
|
||||
logger.warning("Ollama returned an error: %s", str(e))
|
||||
return {
|
||||
@@ -204,3 +191,89 @@ class OllamaClient(GenAIClient):
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
}
|
||||
|
||||
async def chat_with_tools_stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: Optional[list[dict[str, Any]]] = None,
|
||||
tool_choice: Optional[str] = "auto",
|
||||
):
|
||||
"""Stream chat with tools; yields content deltas then final message."""
|
||||
if self.provider is None:
|
||||
logger.warning(
|
||||
"Ollama provider has not been initialized. Check your Ollama configuration."
|
||||
)
|
||||
yield (
|
||||
"message",
|
||||
{
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
},
|
||||
)
|
||||
return
|
||||
try:
|
||||
request_params = self._build_request_params(
|
||||
messages, tools, tool_choice, stream=True
|
||||
)
|
||||
async_client = OllamaAsyncClient(
|
||||
host=self.genai_config.base_url,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
content_parts: list[str] = []
|
||||
final_message: dict[str, Any] | None = None
|
||||
try:
|
||||
stream = await async_client.chat(**request_params)
|
||||
async for chunk in stream:
|
||||
if not chunk or "message" not in chunk:
|
||||
continue
|
||||
msg = chunk.get("message", {})
|
||||
delta = msg.get("content") or ""
|
||||
if delta:
|
||||
content_parts.append(delta)
|
||||
yield ("content_delta", delta)
|
||||
if chunk.get("done"):
|
||||
full_content = "".join(content_parts).strip() or None
|
||||
tool_calls = parse_tool_calls_from_message(msg)
|
||||
final_message = {
|
||||
"content": full_content,
|
||||
"tool_calls": tool_calls,
|
||||
"finish_reason": "tool_calls" if tool_calls else "stop",
|
||||
}
|
||||
break
|
||||
finally:
|
||||
await async_client.close()
|
||||
|
||||
if final_message is not None:
|
||||
yield ("message", final_message)
|
||||
else:
|
||||
yield (
|
||||
"message",
|
||||
{
|
||||
"content": "".join(content_parts).strip() or None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "stop",
|
||||
},
|
||||
)
|
||||
except (TimeoutException, ResponseError, ConnectionError) as e:
|
||||
logger.warning("Ollama streaming error: %s", str(e))
|
||||
yield (
|
||||
"message",
|
||||
{
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Unexpected error in Ollama chat_with_tools_stream: %s", str(e)
|
||||
)
|
||||
yield (
|
||||
"message",
|
||||
{
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"finish_reason": "error",
|
||||
},
|
||||
)
|
||||
|
||||
70
frigate/genai/utils.py
Normal file
70
frigate/genai/utils.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Shared helpers for GenAI providers and chat (OpenAI-style messages, tool call parsing)."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_tool_calls_from_message(
|
||||
message: dict[str, Any],
|
||||
) -> Optional[list[dict[str, Any]]]:
|
||||
"""
|
||||
Parse tool_calls from an OpenAI-style message dict.
|
||||
|
||||
Message may have "tool_calls" as a list of:
|
||||
{"id": str, "function": {"name": str, "arguments": str}, ...}
|
||||
|
||||
Returns a list of {"id", "name", "arguments"} with arguments parsed as dict,
|
||||
or None if no tool_calls. Used by Ollama and LlamaCpp (non-stream) responses.
|
||||
"""
|
||||
raw = message.get("tool_calls")
|
||||
if not raw or not isinstance(raw, list):
|
||||
return None
|
||||
result = []
|
||||
for tool_call in raw:
|
||||
function_data = tool_call.get("function") or {}
|
||||
try:
|
||||
arguments_str = function_data.get("arguments") or "{}"
|
||||
arguments = json.loads(arguments_str)
|
||||
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
logger.warning(
|
||||
"Failed to parse tool call arguments: %s, tool: %s",
|
||||
e,
|
||||
function_data.get("name", "unknown"),
|
||||
)
|
||||
arguments = {}
|
||||
result.append(
|
||||
{
|
||||
"id": tool_call.get("id", ""),
|
||||
"name": function_data.get("name", ""),
|
||||
"arguments": arguments,
|
||||
}
|
||||
)
|
||||
return result if result else None
|
||||
|
||||
|
||||
def build_assistant_message_for_conversation(
|
||||
content: Any,
|
||||
tool_calls_raw: Optional[List[dict[str, Any]]],
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Build the assistant message dict in OpenAI format for appending to a conversation.
|
||||
|
||||
tool_calls_raw: list of {"id", "name", "arguments"} (arguments as dict), or None.
|
||||
"""
|
||||
msg: dict[str, Any] = {"role": "assistant", "content": content}
|
||||
if tool_calls_raw:
|
||||
msg["tool_calls"] = [
|
||||
{
|
||||
"id": tc["id"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc["name"],
|
||||
"arguments": json.dumps(tc.get("arguments") or {}),
|
||||
},
|
||||
}
|
||||
for tc in tool_calls_raw
|
||||
]
|
||||
return msg
|
||||
@@ -62,6 +62,7 @@ export default function ChatPage() {
|
||||
setMessages((prev) => [...prev, assistantMessage]);
|
||||
|
||||
let buffer = "";
|
||||
let hadStreamError = false;
|
||||
for (;;) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
@@ -81,6 +82,14 @@ export default function ChatPage() {
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (data.type === "error" && "error" in data) {
|
||||
setError((data as { error?: string }).error ?? t("error"));
|
||||
setMessages((prev) =>
|
||||
prev.filter((m) => !(m.role === "assistant" && m.content === "")),
|
||||
);
|
||||
hadStreamError = true;
|
||||
break;
|
||||
}
|
||||
if (data.type === "tool_calls" && data.tool_calls?.length) {
|
||||
setMessages((prev) => {
|
||||
const next = [...prev];
|
||||
@@ -105,8 +114,11 @@ export default function ChatPage() {
|
||||
});
|
||||
}
|
||||
}
|
||||
if (hadStreamError) break;
|
||||
}
|
||||
if (buffer.trim()) {
|
||||
if (hadStreamError) {
|
||||
// already set error and cleaned up
|
||||
} else if (buffer.trim()) {
|
||||
try {
|
||||
const data = JSON.parse(buffer.trim()) as {
|
||||
type: string;
|
||||
@@ -130,13 +142,15 @@ export default function ChatPage() {
|
||||
}
|
||||
}
|
||||
|
||||
setMessages((prev) => {
|
||||
const next = [...prev];
|
||||
const last = next[next.length - 1];
|
||||
if (last?.role === "assistant" && last.content === "")
|
||||
next[next.length - 1] = { ...last, content: " " };
|
||||
return next;
|
||||
});
|
||||
if (!hadStreamError) {
|
||||
setMessages((prev) => {
|
||||
const next = [...prev];
|
||||
const last = next[next.length - 1];
|
||||
if (last?.role === "assistant" && last.content === "")
|
||||
next[next.length - 1] = { ...last, content: " " };
|
||||
return next;
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
setError(t("error"));
|
||||
setMessages((prev) =>
|
||||
|
||||
@@ -4,7 +4,7 @@ import { defineConfig } from "vite";
|
||||
import react from "@vitejs/plugin-react-swc";
|
||||
import monacoEditorPlugin from "vite-plugin-monaco-editor";
|
||||
|
||||
const proxyHost = process.env.PROXY_HOST || "1ocalhost:5000";
|
||||
const proxyHost = process.env.PROXY_HOST || "192.168.50.106:5002";
|
||||
|
||||
// https://vitejs.dev/config/
|
||||
export default defineConfig({
|
||||
|
||||
Reference in New Issue
Block a user