From 54eb03d2a1bc77846582444a0a364201034925c3 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Thu, 10 Oct 2024 17:46:21 -0500
Subject: [PATCH] Add config option to select fp16 or quantized jina vision
 model (#14270)

* Add config option to select fp16 or quantized jina vision model

* requires_fp16 for text and large models only

* fix model type check

* fix cpu

* pass model size
---
 docs/docs/configuration/reference.md       |  2 ++
 docs/docs/configuration/semantic_search.md | 10 +++++++++
 frigate/config/semantic_search.py          |  3 +++
 frigate/embeddings/embeddings.py           | 24 ++++++++++++++++------
 frigate/embeddings/functions/onnx.py       |  5 ++++-
 web/src/pages/Explore.tsx                  |  9 +++++---
 web/src/types/frigateConfig.ts             |  1 +
 7 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md
index 66e49fb7f..234478714 100644
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@@ -520,6 +520,8 @@ semantic_search:
   reindex: False
   # Optional: Set device used to run embeddings, options are AUTO, CPU, GPU. (default: shown below)
   device: "AUTO"
+  # Optional: Set the model size used for embeddings. (default: shown below)
+  model_size: "small"
 
 # Optional: Configuration for AI generated tracked object descriptions
 # NOTE: Semantic Search must be enabled for this to do anything.
diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index 7cb8ca769..87ccbf802 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -39,6 +39,16 @@ The vision model is able to embed both images and text into the same vector spac
 
 The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions.
 
+Differently weighted CLIP models are available and can be selected by setting the `model_size` config option:
+
+```yaml
+semantic_search:
+  enabled: True
+  model_size: small
+```
+
+Using `large` as the model size setting employs the full Jina model appropriate for high performance systems running a GPU. The `small` size uses a quantized version of the model that uses much less RAM and runs faster on CPU with a very negligible difference in embedding quality. Most users will not need to change this setting from the default of `small`.
+
 ## Usage
 
 1. Semantic search is used in conjunction with the other filters available on the Search page. Use a combination of traditional filtering and semantic search for the best results.
diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py
index ecdcd12d1..fdaf0fff4 100644
--- a/frigate/config/semantic_search.py
+++ b/frigate/config/semantic_search.py
@@ -13,3 +13,6 @@ class SemanticSearchConfig(FrigateBaseModel):
         default=False, title="Reindex all detections on startup."
     )
     device: str = Field(default="AUTO", title="Device Type")
+    model_size: str = Field(
+        default="small", title="The size of the embeddings model used."
+    )
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index e9d8ab833..5fe0566f2 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -68,7 +68,9 @@ class Embeddings:
         models = [
             "jinaai/jina-clip-v1-text_model_fp16.onnx",
             "jinaai/jina-clip-v1-tokenizer",
-            "jinaai/jina-clip-v1-vision_model_fp16.onnx",
+            "jinaai/jina-clip-v1-vision_model_fp16.onnx"
+            if config.model_size == "large"
+            else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
             "jinaai/jina-clip-v1-preprocessor_config.json",
         ]
 
@@ -95,19 +97,29 @@ class Embeddings:
                 "text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
             },
             embedding_function=jina_text_embedding_function,
+            model_size=config.model_size,
             model_type="text",
             requestor=self.requestor,
             device="CPU",
         )
 
+        model_file = (
+            "vision_model_fp16.onnx"
+            if self.config.model_size == "large"
+            else "vision_model_quantized.onnx"
+        )
+
+        download_urls = {
+            model_file: f"https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/{model_file}",
+            "preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
+        }
+
         self.vision_embedding = GenericONNXEmbedding(
             model_name="jinaai/jina-clip-v1",
-            model_file="vision_model_fp16.onnx",
-            download_urls={
-                "vision_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/vision_model_fp16.onnx",
-                "preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
-            },
+            model_file=model_file,
+            download_urls=download_urls,
             embedding_function=jina_vision_embedding_function,
+            model_size=config.model_size,
             model_type="vision",
             requestor=self.requestor,
             device=self.config.device,
diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py
index 34a81528a..ae9fe33bc 100644
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@@ -41,6 +41,7 @@ class GenericONNXEmbedding:
         model_file: str,
         download_urls: Dict[str, str],
         embedding_function: Callable[[List[np.ndarray]], np.ndarray],
+        model_size: str,
         model_type: str,
         requestor: InterProcessRequestor,
         tokenizer_file: Optional[str] = None,
@@ -54,7 +55,9 @@ class GenericONNXEmbedding:
         self.embedding_function = embedding_function
         self.model_type = model_type  # 'text' or 'vision'
         self.providers, self.provider_options = get_ort_providers(
-            force_cpu=device == "CPU", requires_fp16=True, openvino_device=device
+            force_cpu=device == "CPU",
+            requires_fp16=model_size == "large" or self.model_type == "text",
+            openvino_device=device,
         )
 
         self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
diff --git a/web/src/pages/Explore.tsx b/web/src/pages/Explore.tsx
index 8607c8760..59c3fd895 100644
--- a/web/src/pages/Explore.tsx
+++ b/web/src/pages/Explore.tsx
@@ -207,9 +207,12 @@ export default function Explore() {
   const { payload: textTokenizerState } = useModelState(
     "jinaai/jina-clip-v1-tokenizer",
   );
-  const { payload: visionModelState } = useModelState(
-    "jinaai/jina-clip-v1-vision_model_fp16.onnx",
-  );
+  const modelFile =
+    config?.semantic_search.model_size === "large"
+      ? "jinaai/jina-clip-v1-vision_model_fp16.onnx"
+      : "jinaai/jina-clip-v1-vision_model_quantized.onnx";
+
+  const { payload: visionModelState } = useModelState(modelFile);
   const { payload: visionFeatureExtractorState } = useModelState(
     "jinaai/jina-clip-v1-preprocessor_config.json",
   );
diff --git a/web/src/types/frigateConfig.ts b/web/src/types/frigateConfig.ts
index 68003f0e0..fe889ed9d 100644
--- a/web/src/types/frigateConfig.ts
+++ b/web/src/types/frigateConfig.ts
@@ -417,6 +417,7 @@ export interface FrigateConfig {
 
   semantic_search: {
     enabled: boolean;
+    model_size: string;
   };
 
   snapshots: {