* fix ollama chat tool calling

handle dict arguments, streaming fallback, and message format

* pin setuptools<81 to ensure pkg_resources remains available

When ensure_torch_dependencies() installs torch/torchvision via pip, it can upgrade setuptools to >=81.0.0, which removed the pkg_resources module. rknn-toolkit2 depends on pkg_resources internally, so subsequent RKNN conversion fails with No module named 'pkg_resources'.
This commit is contained in:
Josh Hawkins
2026-03-05 14:11:32 -06:00
committed by GitHub
parent 2782931c72
commit 65db9b0aec
3 changed files with 74 additions and 24 deletions

View File

@@ -1,5 +1,6 @@
"""Ollama Provider for Frigate AI."""
import json
import logging
from typing import Any, Optional
@@ -108,7 +109,22 @@ class OllamaClient(GenAIClient):
if msg.get("name"):
msg_dict["name"] = msg["name"]
if msg.get("tool_calls"):
msg_dict["tool_calls"] = msg["tool_calls"]
# Ollama requires tool call arguments as dicts, but the
# conversation format (OpenAI-style) stores them as JSON
# strings. Convert back to dicts for Ollama.
ollama_tool_calls = []
for tc in msg["tool_calls"]:
func = tc.get("function") or {}
args = func.get("arguments") or {}
if isinstance(args, str):
try:
args = json.loads(args)
except (json.JSONDecodeError, TypeError):
args = {}
ollama_tool_calls.append(
{"function": {"name": func.get("name", ""), "arguments": args}}
)
msg_dict["tool_calls"] = ollama_tool_calls
request_messages.append(msg_dict)
request_params: dict[str, Any] = {
@@ -120,25 +136,27 @@ class OllamaClient(GenAIClient):
request_params["stream"] = True
if tools:
request_params["tools"] = tools
if tool_choice:
request_params["tool_choice"] = (
"none"
if tool_choice == "none"
else "required"
if tool_choice == "required"
else "auto"
)
return request_params
def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]:
"""Parse Ollama chat response into {content, tool_calls, finish_reason}."""
if not response or "message" not in response:
logger.debug("Ollama response empty or missing 'message' key")
return {
"content": None,
"tool_calls": None,
"finish_reason": "error",
}
message = response["message"]
logger.debug(
"Ollama response message keys: %s, content_len=%s, thinking_len=%s, "
"tool_calls=%s, done=%s",
list(message.keys()) if hasattr(message, "keys") else "N/A",
len(message.get("content", "") or "") if message.get("content") else 0,
len(message.get("thinking", "") or "") if message.get("thinking") else 0,
bool(message.get("tool_calls")),
response.get("done"),
)
content = message.get("content", "").strip() if message.get("content") else None
tool_calls = parse_tool_calls_from_message(message)
finish_reason = "error"
@@ -198,7 +216,13 @@ class OllamaClient(GenAIClient):
tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto",
):
"""Stream chat with tools; yields content deltas then final message."""
"""Stream chat with tools; yields content deltas then final message.
When tools are provided, Ollama streaming does not include tool_calls
in the response chunks. To work around this, we use a non-streaming
call when tools are present to ensure tool calls are captured, then
emit the content as a single delta followed by the final message.
"""
if self.provider is None:
logger.warning(
"Ollama provider has not been initialized. Check your Ollama configuration."
@@ -213,6 +237,27 @@ class OllamaClient(GenAIClient):
)
return
try:
# Ollama does not return tool_calls in streaming mode, so fall
# back to a non-streaming call when tools are provided.
if tools:
logger.debug(
"Ollama: tools provided, using non-streaming call for tool support"
)
request_params = self._build_request_params(
messages, tools, tool_choice, stream=False
)
async_client = OllamaAsyncClient(
host=self.genai_config.base_url,
timeout=self.timeout,
)
response = await async_client.chat(**request_params)
result = self._message_from_response(response)
content = result.get("content")
if content:
yield ("content_delta", content)
yield ("message", result)
return
request_params = self._build_request_params(
messages, tools, tool_choice, stream=True
)
@@ -233,11 +278,10 @@ class OllamaClient(GenAIClient):
yield ("content_delta", delta)
if chunk.get("done"):
full_content = "".join(content_parts).strip() or None
tool_calls = parse_tool_calls_from_message(msg)
final_message = {
"content": full_content,
"tool_calls": tool_calls,
"finish_reason": "tool_calls" if tool_calls else "stop",
"tool_calls": None,
"finish_reason": "stop",
}
break

View File

@@ -23,21 +23,26 @@ def parse_tool_calls_from_message(
if not raw or not isinstance(raw, list):
return None
result = []
for tool_call in raw:
for idx, tool_call in enumerate(raw):
function_data = tool_call.get("function") or {}
try:
arguments_str = function_data.get("arguments") or "{}"
arguments = json.loads(arguments_str)
except (json.JSONDecodeError, KeyError, TypeError) as e:
logger.warning(
"Failed to parse tool call arguments: %s, tool: %s",
e,
function_data.get("name", "unknown"),
)
raw_arguments = function_data.get("arguments") or {}
if isinstance(raw_arguments, dict):
arguments = raw_arguments
elif isinstance(raw_arguments, str):
try:
arguments = json.loads(raw_arguments)
except (json.JSONDecodeError, KeyError, TypeError) as e:
logger.warning(
"Failed to parse tool call arguments: %s, tool: %s",
e,
function_data.get("name", "unknown"),
)
arguments = {}
else:
arguments = {}
result.append(
{
"id": tool_call.get("id", ""),
"id": tool_call.get("id", "") or f"call_{idx}",
"name": function_data.get("name", ""),
"arguments": arguments,
}

View File

@@ -110,6 +110,7 @@ def ensure_torch_dependencies() -> bool:
"pip",
"install",
"--break-system-packages",
"setuptools<81",
"torch",
"torchvision",
],