mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-20 13:54:36 +01:00
Add embed API support
This commit is contained in:
@@ -7,6 +7,7 @@ import os
|
||||
import re
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
from playhouse.shortcuts import model_to_dict
|
||||
|
||||
from frigate.config import CameraConfig, FrigateConfig, GenAIConfig, GenAIProviderEnum
|
||||
@@ -304,6 +305,25 @@ Guidelines:
|
||||
"""Get the context window size for this provider in tokens."""
|
||||
return 4096
|
||||
|
||||
def embed(
|
||||
self,
|
||||
texts: list[str] | None = None,
|
||||
images: list[bytes] | None = None,
|
||||
) -> list[np.ndarray]:
|
||||
"""Generate embeddings for text and/or images.
|
||||
|
||||
Returns list of numpy arrays (one per input). Expected dimension is 768
|
||||
for Frigate semantic search compatibility.
|
||||
|
||||
Providers that support embeddings should override this method.
|
||||
"""
|
||||
logger.warning(
|
||||
"%s does not support embeddings. "
|
||||
"This method should be overridden by the provider implementation.",
|
||||
self.__class__.__name__,
|
||||
)
|
||||
return []
|
||||
|
||||
def chat_with_tools(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
|
||||
@@ -5,6 +5,7 @@ import json
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from frigate.config import GenAIProviderEnum
|
||||
@@ -101,6 +102,97 @@ class LlamaCppClient(GenAIClient):
|
||||
"""Get the context window size for llama.cpp."""
|
||||
return self.genai_config.provider_options.get("context_size", 4096)
|
||||
|
||||
def embed(
|
||||
self,
|
||||
texts: list[str] | None = None,
|
||||
images: list[bytes] | None = None,
|
||||
) -> list[np.ndarray]:
|
||||
"""Generate embeddings via llama.cpp /embeddings endpoint.
|
||||
|
||||
Supports batch requests. Uses content format with prompt_string and
|
||||
multimodal_data for images (PR #15108). Server must be started with
|
||||
--embeddings and --mmproj for multimodal support.
|
||||
"""
|
||||
if self.provider is None:
|
||||
logger.warning(
|
||||
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
|
||||
)
|
||||
return []
|
||||
|
||||
texts = texts or []
|
||||
images = images or []
|
||||
if not texts and not images:
|
||||
return []
|
||||
|
||||
EMBEDDING_DIM = 768
|
||||
|
||||
content = []
|
||||
for text in texts:
|
||||
content.append({"prompt_string": text})
|
||||
for img in images:
|
||||
encoded = base64.b64encode(img).decode("utf-8")
|
||||
content.append({"prompt_string": "", "multimodal_data": [encoded]})
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.provider}/embeddings",
|
||||
json={"content": content},
|
||||
timeout=self.timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
items = result.get("data", result) if isinstance(result, dict) else result
|
||||
if not isinstance(items, list):
|
||||
logger.warning("llama.cpp embeddings returned unexpected format")
|
||||
return []
|
||||
|
||||
embeddings = []
|
||||
for item in items:
|
||||
emb = item.get("embedding") if isinstance(item, dict) else None
|
||||
if emb is None:
|
||||
logger.warning("llama.cpp embeddings item missing embedding field")
|
||||
continue
|
||||
arr = np.array(emb, dtype=np.float32)
|
||||
orig_dim = arr.size
|
||||
if orig_dim != EMBEDDING_DIM:
|
||||
if orig_dim > EMBEDDING_DIM:
|
||||
arr = arr[:EMBEDDING_DIM]
|
||||
logger.debug(
|
||||
"Truncated llama.cpp embedding from %d to %d dimensions",
|
||||
orig_dim,
|
||||
EMBEDDING_DIM,
|
||||
)
|
||||
else:
|
||||
arr = np.pad(
|
||||
arr,
|
||||
(0, EMBEDDING_DIM - orig_dim),
|
||||
mode="constant",
|
||||
constant_values=0,
|
||||
)
|
||||
logger.debug(
|
||||
"Padded llama.cpp embedding from %d to %d dimensions",
|
||||
orig_dim,
|
||||
EMBEDDING_DIM,
|
||||
)
|
||||
embeddings.append(arr)
|
||||
return embeddings
|
||||
except requests.exceptions.Timeout:
|
||||
logger.warning("llama.cpp embeddings request timed out")
|
||||
return []
|
||||
except requests.exceptions.RequestException as e:
|
||||
error_detail = str(e)
|
||||
if hasattr(e, "response") and e.response is not None:
|
||||
try:
|
||||
error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
|
||||
except Exception:
|
||||
pass
|
||||
logger.warning("llama.cpp embeddings error: %s", error_detail)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
|
||||
return []
|
||||
|
||||
def chat_with_tools(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
|
||||
Reference in New Issue
Block a user