mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-07-30 13:48:07 +02:00
Model and genai fixes (#14481)
* disable mem arena in options for cpu only * add try/except around ollama initialization * update docs
This commit is contained in:
parent
4bb420d049
commit
8364e68667
@ -29,11 +29,15 @@ cameras:
|
|||||||
|
|
||||||
## Ollama
|
## Ollama
|
||||||
|
|
||||||
[Ollama](https://ollama.com/) allows you to self-host large language models and keep everything running locally. It provides a nice API over [llama.cpp](https://github.com/ggerganov/llama.cpp). It is highly recommended to host this server on a machine with an Nvidia graphics card, or on a Apple silicon Mac for best performance. Most of the 7b parameter 4-bit vision models will fit inside 8GB of VRAM. There is also a [docker container](https://hub.docker.com/r/ollama/ollama) available.
|
[Ollama](https://ollama.com/) allows you to self-host large language models and keep everything running locally. It provides a nice API over [llama.cpp](https://github.com/ggerganov/llama.cpp). It is highly recommended to host this server on a machine with an Nvidia graphics card, or on a Apple silicon Mac for best performance. CPU inference is not recommended.
|
||||||
|
|
||||||
|
Most of the 7b parameter 4-bit vision models will fit inside 8GB of VRAM. There is also a [docker container](https://hub.docker.com/r/ollama/ollama) available.
|
||||||
|
|
||||||
|
Parallel requests also come with some caveats. See the [Ollama documentation](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-does-ollama-handle-concurrent-requests).
|
||||||
|
|
||||||
### Supported Models
|
### Supported Models
|
||||||
|
|
||||||
You must use a vision capable model with Frigate. Current model variants can be found [in their model library](https://ollama.com/library). At the time of writing, this includes `llava`, `llava-llama3`, `llava-phi3`, and `moondream`.
|
You must use a vision capable model with Frigate. Current model variants can be found [in their model library](https://ollama.com/library). At the time of writing, this includes `llava`, `llava-llama3`, `llava-phi3`, and `moondream`. Note that Frigate will not automatically download the model you specify in your config, you must download the model to your local instance of Ollama first.
|
||||||
|
|
||||||
:::note
|
:::note
|
||||||
|
|
||||||
|
@ -21,12 +21,20 @@ class OllamaClient(GenAIClient):
|
|||||||
|
|
||||||
def _init_provider(self):
|
def _init_provider(self):
|
||||||
"""Initialize the client."""
|
"""Initialize the client."""
|
||||||
client = ApiClient(host=self.genai_config.base_url, timeout=self.timeout)
|
try:
|
||||||
response = client.pull(self.genai_config.model)
|
client = ApiClient(host=self.genai_config.base_url, timeout=self.timeout)
|
||||||
if response["status"] != "success":
|
# ensure the model is available locally
|
||||||
logger.error("Failed to pull %s model from Ollama", self.genai_config.model)
|
response = client.show(self.genai_config.model)
|
||||||
|
if response.get("error"):
|
||||||
|
logger.error(
|
||||||
|
"Ollama error: %s",
|
||||||
|
response["error"],
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
return client
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Error initializing Ollama: %s", str(e))
|
||||||
return None
|
return None
|
||||||
return client
|
|
||||||
|
|
||||||
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
||||||
"""Submit a request to Ollama"""
|
"""Submit a request to Ollama"""
|
||||||
|
@ -20,7 +20,7 @@ def get_ort_providers(
|
|||||||
["CPUExecutionProvider"],
|
["CPUExecutionProvider"],
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"arena_extend_strategy": "kSameAsRequested",
|
"enable_cpu_mem_arena": False,
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -53,7 +53,7 @@ def get_ort_providers(
|
|||||||
providers.append(provider)
|
providers.append(provider)
|
||||||
options.append(
|
options.append(
|
||||||
{
|
{
|
||||||
"arena_extend_strategy": "kSameAsRequested",
|
"enable_cpu_mem_arena": False,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@ -85,12 +85,8 @@ class ONNXModelRunner:
|
|||||||
else:
|
else:
|
||||||
# Use ONNXRuntime
|
# Use ONNXRuntime
|
||||||
self.type = "ort"
|
self.type = "ort"
|
||||||
options = ort.SessionOptions()
|
|
||||||
if device == "CPU":
|
|
||||||
options.enable_cpu_mem_arena = False
|
|
||||||
self.ort = ort.InferenceSession(
|
self.ort = ort.InferenceSession(
|
||||||
model_path,
|
model_path,
|
||||||
sess_options=options,
|
|
||||||
providers=providers,
|
providers=providers,
|
||||||
provider_options=options,
|
provider_options=options,
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user