Override onnx clip loading (#13800)

* Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels
2025-07-21 13:46:56 +02:00 · 2024-09-17 13:24:35 -06:00 · 2024-09-17 13:24:35 -06:00 · 2362d0e838
commit 2362d0e838
parent 90d7fc6bc5
6 changed files with 82 additions and 7 deletions
--- a/docker/main/Dockerfile
+++ b/docker/main/Dockerfile
@ -170,8 +170,8 @@ RUN /build_pysqlite3.sh
 COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
 RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt

-COPY docker/main/requirements-wheels-nodeps.txt /requirements-wheels-nodeps.txt
-RUN pip3 wheel --no-deps --wheel-dir=/wheels -r /requirements-wheels-nodeps.txt
+COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt
+RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt


 # Collect deps in a single layer
@ -215,6 +215,14 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
    python3 -m pip install --upgrade pip && \
    pip3 install -U /deps/wheels/*.whl

+# We have to uninstall this dependency specifically
+# as it will break onnxruntime-openvino
+RUN pip3 uninstall -y onnxruntime
+
+RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \
+    python3 -m pip install --upgrade pip && \
+    pip3 install -U /deps/wheels/*.whl
+
 COPY --from=deps-rootfs / /

 RUN ldconfig
--- a/docker/main/requirements-wheels-nodeps.txt
+++ b/docker/main/requirements-wheels-nodeps.txt
@ -1 +0,0 @@
-onnx_clip == 4.0.*
--- a/docker/main/requirements-wheels-post.txt
+++ b/docker/main/requirements-wheels-post.txt
@ -0,0 +1,3 @@
+# ONNX
+onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
+onnxruntime == 1.18.* ; platform_machine == 'aarch64'
--- a/docker/main/requirements-wheels.txt
+++ b/docker/main/requirements-wheels.txt
@ -30,10 +30,9 @@ ws4py == 0.5.*
 unidecode == 1.3.*
 # OpenVino & ONNX
 openvino == 2024.1.*
-onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
-onnxruntime == 1.18.* ; platform_machine == 'aarch64'
 # Embeddings
 chromadb == 0.5.0
+onnx_clip == 4.0.*
 # Generative AI
 google-generativeai == 0.6.*
 ollama == 0.2.*
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@ -85,7 +85,10 @@ class Embeddings:
    @property
    def description(self) -> Collection:
        return self.client.get_or_create_collection(
-            name="event_description", embedding_function=MiniLMEmbedding()
+            name="event_description",
+            embedding_function=MiniLMEmbedding(
+                preferred_providers=["CPUExecutionProvider"]
+            ),
        )

    def reindex(self) -> None:
--- a/frigate/embeddings/functions/clip.py
+++ b/frigate/embeddings/functions/clip.py
@ -1,9 +1,13 @@
 """CLIP Embeddings for Frigate."""

+import errno
+import logging
 import os
+from pathlib import Path
 from typing import Tuple, Union

 import onnxruntime as ort
+import requests
 from chromadb import EmbeddingFunction, Embeddings
 from chromadb.api.types import (
    Documents,
@ -39,10 +43,69 @@ class Clip(OnnxClip):
        models = []
        for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:
            path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)
-            models.append(OnnxClip._load_model(path, silent))
+            models.append(Clip._load_model(path, silent))

        return models[0], models[1]

+    @staticmethod
+    def _load_model(path: str, silent: bool):
+        providers = ort.get_available_providers()
+        options = []
+
+        for provider in providers:
+            if provider == "TensorrtExecutionProvider":
+                options.append(
+                    {
+                        "trt_timing_cache_enable": True,
+                        "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
+                        "trt_engine_cache_enable": True,
+                        "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
+                    }
+                )
+            elif provider == "OpenVINOExecutionProvider":
+                options.append({"cache_dir": "/config/model_cache/openvino/ort"})
+            else:
+                options.append({})
+
+        try:
+            if os.path.exists(path):
+                return ort.InferenceSession(
+                    path, providers=providers, provider_options=options
+                )
+            else:
+                raise FileNotFoundError(
+                    errno.ENOENT,
+                    os.strerror(errno.ENOENT),
+                    path,
+                )
+        except Exception:
+            s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"
+            if not silent:
+                logging.info(
+                    f"The model file ({path}) doesn't exist "
+                    f"or it is invalid. Downloading it from the public S3 "
+                    f"bucket: {s3_url}."  # noqa: E501
+                )
+
+            # Download from S3
+            # Saving to a temporary file first to avoid corrupting the file
+            temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")
+
+            # Create any missing directories in the path
+            temporary_filename.parent.mkdir(parents=True, exist_ok=True)
+
+            with requests.get(s3_url, stream=True) as r:
+                r.raise_for_status()
+                with open(temporary_filename, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=8192):
+                        f.write(chunk)
+                    f.flush()
+            # Finally move the temporary file to the correct location
+            temporary_filename.rename(path)
+            return ort.InferenceSession(
+                path, providers=provider, provider_options=options
+            )
+

 class ClipEmbedding(EmbeddingFunction):
    """Embedding function for CLIP model used in Chroma."""