Add embed API support

2026-04-19 23:08:08 +02:00 · 2026-02-19 08:13:50 -07:00
parent 266e243425
commit e013a0206a
2 changed files with 112 additions and 0 deletions
--- a/frigate/genai/init.py
+++ b/frigate/genai/init.py
@@ -7,6 +7,7 @@ import os
 import re
 from typing import Any, Optional

+import numpy as np
 from playhouse.shortcuts import model_to_dict

 from frigate.config import CameraConfig, FrigateConfig, GenAIConfig, GenAIProviderEnum
@@ -304,6 +305,25 @@ Guidelines:
        """Get the context window size for this provider in tokens."""
        return 4096

+    def embed(
+        self,
+        texts: list[str] | None = None,
+        images: list[bytes] | None = None,
+    ) -> list[np.ndarray]:
+        """Generate embeddings for text and/or images.
+
+        Returns list of numpy arrays (one per input). Expected dimension is 768
+        for Frigate semantic search compatibility.
+
+        Providers that support embeddings should override this method.
+        """
+        logger.warning(
+            "%s does not support embeddings. "
+            "This method should be overridden by the provider implementation.",
+            self.__class__.__name__,
+        )
+        return []
+
    def chat_with_tools(
        self,
        messages: list[dict[str, Any]],
--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@@ -5,6 +5,7 @@ import json
 import logging
 from typing import Any, Optional

+import numpy as np
 import requests

 from frigate.config import GenAIProviderEnum
@@ -101,6 +102,97 @@ class LlamaCppClient(GenAIClient):
        """Get the context window size for llama.cpp."""
        return self.genai_config.provider_options.get("context_size", 4096)

+    def embed(
+        self,
+        texts: list[str] | None = None,
+        images: list[bytes] | None = None,
+    ) -> list[np.ndarray]:
+        """Generate embeddings via llama.cpp /embeddings endpoint.
+
+        Supports batch requests. Uses content format with prompt_string and
+        multimodal_data for images (PR #15108). Server must be started with
+        --embeddings and --mmproj for multimodal support.
+        """
+        if self.provider is None:
+            logger.warning(
+                "llama.cpp provider has not been initialized. Check your llama.cpp configuration."
+            )
+            return []
+
+        texts = texts or []
+        images = images or []
+        if not texts and not images:
+            return []
+
+        EMBEDDING_DIM = 768
+
+        content = []
+        for text in texts:
+            content.append({"prompt_string": text})
+        for img in images:
+            encoded = base64.b64encode(img).decode("utf-8")
+            content.append({"prompt_string": "", "multimodal_data": [encoded]})
+
+        try:
+            response = requests.post(
+                f"{self.provider}/embeddings",
+                json={"content": content},
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            result = response.json()
+
+            items = result.get("data", result) if isinstance(result, dict) else result
+            if not isinstance(items, list):
+                logger.warning("llama.cpp embeddings returned unexpected format")
+                return []
+
+            embeddings = []
+            for item in items:
+                emb = item.get("embedding") if isinstance(item, dict) else None
+                if emb is None:
+                    logger.warning("llama.cpp embeddings item missing embedding field")
+                    continue
+                arr = np.array(emb, dtype=np.float32)
+                orig_dim = arr.size
+                if orig_dim != EMBEDDING_DIM:
+                    if orig_dim > EMBEDDING_DIM:
+                        arr = arr[:EMBEDDING_DIM]
+                        logger.debug(
+                            "Truncated llama.cpp embedding from %d to %d dimensions",
+                            orig_dim,
+                            EMBEDDING_DIM,
+                        )
+                    else:
+                        arr = np.pad(
+                            arr,
+                            (0, EMBEDDING_DIM - orig_dim),
+                            mode="constant",
+                            constant_values=0,
+                        )
+                        logger.debug(
+                            "Padded llama.cpp embedding from %d to %d dimensions",
+                            orig_dim,
+                            EMBEDDING_DIM,
+                        )
+                embeddings.append(arr)
+            return embeddings
+        except requests.exceptions.Timeout:
+            logger.warning("llama.cpp embeddings request timed out")
+            return []
+        except requests.exceptions.RequestException as e:
+            error_detail = str(e)
+            if hasattr(e, "response") and e.response is not None:
+                try:
+                    error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
+                except Exception:
+                    pass
+            logger.warning("llama.cpp embeddings error: %s", error_detail)
+            return []
+        except Exception as e:
+            logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
+            return []
+
    def chat_with_tools(
        self,
        messages: list[dict[str, Any]],