Add config option to select fp16 or quantized jina vision model (#14270)

* Add config option to select fp16 or quantized jina vision model * requires_fp16 for text and large models only * fix model type check * fix cpu * pass model size
2025-07-30 13:48:07 +02:00 · 2024-10-10 17:46:21 -05:00 · 2024-10-10 17:46:21 -05:00 · 54eb03d2a1
commit 54eb03d2a1
parent dd6276e706
7 changed files with 44 additions and 10 deletions
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@ -520,6 +520,8 @@ semantic_search:
  reindex: False
  # Optional: Set device used to run embeddings, options are AUTO, CPU, GPU. (default: shown below)
  device: "AUTO"
+  # Optional: Set the model size used for embeddings. (default: shown below)
+  model_size: "small"

 # Optional: Configuration for AI generated tracked object descriptions
 # NOTE: Semantic Search must be enabled for this to do anything.
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@ -39,6 +39,16 @@ The vision model is able to embed both images and text into the same vector spac

 The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions.

+Differently weighted CLIP models are available and can be selected by setting the `model_size` config option:
+
+```yaml
+semantic_search:
+  enabled: True
+  model_size: small
+```
+
+Using `large` as the model size setting employs the full Jina model appropriate for high performance systems running a GPU. The `small` size uses a quantized version of the model that uses much less RAM and runs faster on CPU with a very negligible difference in embedding quality. Most users will not need to change this setting from the default of `small`.
+
 ## Usage

 1. Semantic search is used in conjunction with the other filters available on the Search page. Use a combination of traditional filtering and semantic search for the best results.
--- a/frigate/config/semantic_search.py
+++ b/frigate/config/semantic_search.py
@ -13,3 +13,6 @@ class SemanticSearchConfig(FrigateBaseModel):
        default=False, title="Reindex all detections on startup."
    )
    device: str = Field(default="AUTO", title="Device Type")
+    model_size: str = Field(
+        default="small", title="The size of the embeddings model used."
+    )
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@ -68,7 +68,9 @@ class Embeddings:
        models = [
            "jinaai/jina-clip-v1-text_model_fp16.onnx",
            "jinaai/jina-clip-v1-tokenizer",
-            "jinaai/jina-clip-v1-vision_model_fp16.onnx",
+            "jinaai/jina-clip-v1-vision_model_fp16.onnx"
+            if config.model_size == "large"
+            else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
            "jinaai/jina-clip-v1-preprocessor_config.json",
        ]

@ -95,19 +97,29 @@ class Embeddings:
                "text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
            },
            embedding_function=jina_text_embedding_function,
+            model_size=config.model_size,
            model_type="text",
            requestor=self.requestor,
            device="CPU",
        )

+        model_file = (
+            "vision_model_fp16.onnx"
+            if self.config.model_size == "large"
+            else "vision_model_quantized.onnx"
+        )
+
+        download_urls = {
+            model_file: f"https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/{model_file}",
+            "preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
+        }
+
        self.vision_embedding = GenericONNXEmbedding(
            model_name="jinaai/jina-clip-v1",
-            model_file="vision_model_fp16.onnx",
-            download_urls={
-                "vision_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/vision_model_fp16.onnx",
-                "preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
-            },
+            model_file=model_file,
+            download_urls=download_urls,
            embedding_function=jina_vision_embedding_function,
+            model_size=config.model_size,
            model_type="vision",
            requestor=self.requestor,
            device=self.config.device,
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@ -41,6 +41,7 @@ class GenericONNXEmbedding:
        model_file: str,
        download_urls: Dict[str, str],
        embedding_function: Callable[[List[np.ndarray]], np.ndarray],
+        model_size: str,
        model_type: str,
        requestor: InterProcessRequestor,
        tokenizer_file: Optional[str] = None,
@ -54,7 +55,9 @@ class GenericONNXEmbedding:
        self.embedding_function = embedding_function
        self.model_type = model_type  # 'text' or 'vision'
        self.providers, self.provider_options = get_ort_providers(
-            force_cpu=device == "CPU", requires_fp16=True, openvino_device=device
+            force_cpu=device == "CPU",
+            requires_fp16=model_size == "large" or self.model_type == "text",
+            openvino_device=device,
        )

        self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
--- a/web/src/pages/Explore.tsx
+++ b/web/src/pages/Explore.tsx
@ -207,9 +207,12 @@ export default function Explore() {
  const { payload: textTokenizerState } = useModelState(
    "jinaai/jina-clip-v1-tokenizer",
  );
-  const { payload: visionModelState } = useModelState(
-    "jinaai/jina-clip-v1-vision_model_fp16.onnx",
-  );
+  const modelFile =
+    config?.semantic_search.model_size === "large"
+      ? "jinaai/jina-clip-v1-vision_model_fp16.onnx"
+      : "jinaai/jina-clip-v1-vision_model_quantized.onnx";
+
+  const { payload: visionModelState } = useModelState(modelFile);
  const { payload: visionFeatureExtractorState } = useModelState(
    "jinaai/jina-clip-v1-preprocessor_config.json",
  );
--- a/web/src/types/frigateConfig.ts
+++ b/web/src/types/frigateConfig.ts
@ -417,6 +417,7 @@ export interface FrigateConfig {

  semantic_search: {
    enabled: boolean;
+    model_size: string;
  };

  snapshots: {