Override onnx clip loading (#13800)

* Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels
2025-10-13 11:16:29 +02:00 · 2024-09-17 13:24:35 -06:00 · 2024-09-17 13:24:35 -06:00 · 2362d0e838
commit 2362d0e838
parent 90d7fc6bc5
6 changed files with 82 additions and 7 deletions
--- a/docker/main/Dockerfile
+++ b/docker/main/Dockerfile
@ -170,8 +170,8 @@ RUN /build_pysqlite3.sh
 COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
 RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt
-COPY docker/main/requirements-wheels-nodeps.txt /requirements-wheels-nodeps.txt
+COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt
-RUN pip3 wheel --no-deps --wheel-dir=/wheels -r /requirements-wheels-nodeps.txt
+RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt
 # Collect deps in a single layer
@ -215,6 +215,14 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
    python3 -m pip install --upgrade pip && \
    pip3 install -U /deps/wheels/*.whl
 # We have to uninstall this dependency specifically
 # as it will break onnxruntime-openvino
 RUN pip3 uninstall -y onnxruntime
 RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \
    python3 -m pip install --upgrade pip && \
    pip3 install -U /deps/wheels/*.whl
 COPY --from=deps-rootfs / /
 RUN ldconfig
--- a/docker/main/requirements-wheels-nodeps.txt
+++ b/docker/main/requirements-wheels-nodeps.txt
@ -1 +0,0 @@
 onnx_clip == 4.0.*
--- a/docker/main/requirements-wheels-post.txt
+++ b/docker/main/requirements-wheels-post.txt
@ -0,0 +1,3 @@
 # ONNX
 onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
 onnxruntime == 1.18.* ; platform_machine == 'aarch64'
--- a/docker/main/requirements-wheels.txt
+++ b/docker/main/requirements-wheels.txt
@ -30,10 +30,9 @@ ws4py == 0.5.*
 unidecode == 1.3.*
 # OpenVino & ONNX
 openvino == 2024.1.*
 onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
 onnxruntime == 1.18.* ; platform_machine == 'aarch64'
 # Embeddings
 chromadb == 0.5.0
 onnx_clip == 4.0.*
 # Generative AI
 google-generativeai == 0.6.*
 ollama == 0.2.*
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@ -85,7 +85,10 @@ class Embeddings:
    @property
    def description(self) -> Collection:
        return self.client.get_or_create_collection(
-            name="event_description", embedding_function=MiniLMEmbedding()
+            name="event_description",
            embedding_function=MiniLMEmbedding(
                preferred_providers=["CPUExecutionProvider"]
            ),
        )
    def reindex(self) -> None:
--- a/frigate/embeddings/functions/clip.py
+++ b/frigate/embeddings/functions/clip.py
@ -1,9 +1,13 @@
 """CLIP Embeddings for Frigate."""
 import errno
 import logging
 import os
 from pathlib import Path
 from typing import Tuple, Union
 import onnxruntime as ort
 import requests
 from chromadb import EmbeddingFunction, Embeddings
 from chromadb.api.types import (
    Documents,
@ -39,10 +43,69 @@ class Clip(OnnxClip):
        models = []
        for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:
            path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)
-            models.append(OnnxClip._load_model(path, silent))
+            models.append(Clip._load_model(path, silent))
        return models[0], models[1]
    @staticmethod
    def _load_model(path: str, silent: bool):
        providers = ort.get_available_providers()
        options = []
        for provider in providers:
            if provider == "TensorrtExecutionProvider":
                options.append(
                    {
                        "trt_timing_cache_enable": True,
                        "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
                        "trt_engine_cache_enable": True,
                        "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
                    }
                )
            elif provider == "OpenVINOExecutionProvider":
                options.append({"cache_dir": "/config/model_cache/openvino/ort"})
            else:
                options.append({})
        try:
            if os.path.exists(path):
                return ort.InferenceSession(
                    path, providers=providers, provider_options=options
                )
            else:
                raise FileNotFoundError(
                    errno.ENOENT,
                    os.strerror(errno.ENOENT),
                    path,
                )
        except Exception:
            s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"
            if not silent:
                logging.info(
                    f"The model file ({path}) doesn't exist "
                    f"or it is invalid. Downloading it from the public S3 "
                    f"bucket: {s3_url}."  # noqa: E501
                )
            # Download from S3
            # Saving to a temporary file first to avoid corrupting the file
            temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")
            # Create any missing directories in the path
            temporary_filename.parent.mkdir(parents=True, exist_ok=True)
            with requests.get(s3_url, stream=True) as r:
                r.raise_for_status()
                with open(temporary_filename, "wb") as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
                    f.flush()
            # Finally move the temporary file to the correct location
            temporary_filename.rename(path)
            return ort.InferenceSession(
                path, providers=provider, provider_options=options
            )
 class ClipEmbedding(EmbeddingFunction):
    """Embedding function for CLIP model used in Chroma."""