Override onnx clip loading (#13800)

* Set caching options for hardware providers

* Always use CPU for searching

* Use new install strategy to remove onnxruntime and then install post wheels
This commit is contained in:
Nicolas Mowen 2024-09-17 13:24:35 -06:00 committed by GitHub
parent 90d7fc6bc5
commit 2362d0e838
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 82 additions and 7 deletions

View File

@ -170,8 +170,8 @@ RUN /build_pysqlite3.sh
COPY docker/main/requirements-wheels.txt /requirements-wheels.txt COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt
COPY docker/main/requirements-wheels-nodeps.txt /requirements-wheels-nodeps.txt COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt
RUN pip3 wheel --no-deps --wheel-dir=/wheels -r /requirements-wheels-nodeps.txt RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt
# Collect deps in a single layer # Collect deps in a single layer
@ -215,6 +215,14 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
python3 -m pip install --upgrade pip && \ python3 -m pip install --upgrade pip && \
pip3 install -U /deps/wheels/*.whl pip3 install -U /deps/wheels/*.whl
# We have to uninstall this dependency specifically
# as it will break onnxruntime-openvino
RUN pip3 uninstall -y onnxruntime
RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \
python3 -m pip install --upgrade pip && \
pip3 install -U /deps/wheels/*.whl
COPY --from=deps-rootfs / / COPY --from=deps-rootfs / /
RUN ldconfig RUN ldconfig

View File

@ -1 +0,0 @@
onnx_clip == 4.0.*

View File

@ -0,0 +1,3 @@
# ONNX
onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
onnxruntime == 1.18.* ; platform_machine == 'aarch64'

View File

@ -30,10 +30,9 @@ ws4py == 0.5.*
unidecode == 1.3.* unidecode == 1.3.*
# OpenVino & ONNX # OpenVino & ONNX
openvino == 2024.1.* openvino == 2024.1.*
onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
onnxruntime == 1.18.* ; platform_machine == 'aarch64'
# Embeddings # Embeddings
chromadb == 0.5.0 chromadb == 0.5.0
onnx_clip == 4.0.*
# Generative AI # Generative AI
google-generativeai == 0.6.* google-generativeai == 0.6.*
ollama == 0.2.* ollama == 0.2.*

View File

@ -85,7 +85,10 @@ class Embeddings:
@property @property
def description(self) -> Collection: def description(self) -> Collection:
return self.client.get_or_create_collection( return self.client.get_or_create_collection(
name="event_description", embedding_function=MiniLMEmbedding() name="event_description",
embedding_function=MiniLMEmbedding(
preferred_providers=["CPUExecutionProvider"]
),
) )
def reindex(self) -> None: def reindex(self) -> None:

View File

@ -1,9 +1,13 @@
"""CLIP Embeddings for Frigate.""" """CLIP Embeddings for Frigate."""
import errno
import logging
import os import os
from pathlib import Path
from typing import Tuple, Union from typing import Tuple, Union
import onnxruntime as ort import onnxruntime as ort
import requests
from chromadb import EmbeddingFunction, Embeddings from chromadb import EmbeddingFunction, Embeddings
from chromadb.api.types import ( from chromadb.api.types import (
Documents, Documents,
@ -39,10 +43,69 @@ class Clip(OnnxClip):
models = [] models = []
for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]: for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:
path = os.path.join(MODEL_CACHE_DIR, "clip", model_file) path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)
models.append(OnnxClip._load_model(path, silent)) models.append(Clip._load_model(path, silent))
return models[0], models[1] return models[0], models[1]
@staticmethod
def _load_model(path: str, silent: bool):
providers = ort.get_available_providers()
options = []
for provider in providers:
if provider == "TensorrtExecutionProvider":
options.append(
{
"trt_timing_cache_enable": True,
"trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
"trt_engine_cache_enable": True,
"trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
}
)
elif provider == "OpenVINOExecutionProvider":
options.append({"cache_dir": "/config/model_cache/openvino/ort"})
else:
options.append({})
try:
if os.path.exists(path):
return ort.InferenceSession(
path, providers=providers, provider_options=options
)
else:
raise FileNotFoundError(
errno.ENOENT,
os.strerror(errno.ENOENT),
path,
)
except Exception:
s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"
if not silent:
logging.info(
f"The model file ({path}) doesn't exist "
f"or it is invalid. Downloading it from the public S3 "
f"bucket: {s3_url}." # noqa: E501
)
# Download from S3
# Saving to a temporary file first to avoid corrupting the file
temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")
# Create any missing directories in the path
temporary_filename.parent.mkdir(parents=True, exist_ok=True)
with requests.get(s3_url, stream=True) as r:
r.raise_for_status()
with open(temporary_filename, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
f.flush()
# Finally move the temporary file to the correct location
temporary_filename.rename(path)
return ort.InferenceSession(
path, providers=provider, provider_options=options
)
class ClipEmbedding(EmbeddingFunction): class ClipEmbedding(EmbeddingFunction):
"""Embedding function for CLIP model used in Chroma.""" """Embedding function for CLIP model used in Chroma."""