blakeblackshear.frigate/frigate/embeddings/functions/clip.py

"""CLIP Embeddings for Frigate."""

import errno
import logging
import os
from pathlib import Path
from typing import Tuple, Union

import onnxruntime as ort
import requests
from chromadb import EmbeddingFunction, Embeddings
from chromadb.api.types import (
    Documents,
    Images,
    is_document,
    is_image,
)
from onnx_clip import OnnxClip

from frigate.const import MODEL_CACHE_DIR


class Clip(OnnxClip):
    """Override load models to download to cache directory."""

    @staticmethod
    def _load_models(
        model: str,
        silent: bool,
    ) -> Tuple[ort.InferenceSession, ort.InferenceSession]:
        """
        These models are a part of the container. Treat as as such.
        """
        if model == "ViT-B/32":
            IMAGE_MODEL_FILE = "clip_image_model_vitb32.onnx"
            TEXT_MODEL_FILE = "clip_text_model_vitb32.onnx"
        elif model == "RN50":
            IMAGE_MODEL_FILE = "clip_image_model_rn50.onnx"
            TEXT_MODEL_FILE = "clip_text_model_rn50.onnx"
        else:
            raise ValueError(f"Unexpected model {model}. No `.onnx` file found.")

        models = []
        for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:
            path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)
            models.append(Clip._load_model(path, silent))

        return models[0], models[1]

    @staticmethod
    def _load_model(path: str, silent: bool):
        providers = []
        options = []

        for provider in ort.get_available_providers():
            if provider == "TensorrtExecutionProvider":
                continue
            elif provider == "OpenVINOExecutionProvider":
                # TODO need to verify openvino works correctly
                os.makedirs("/config/model_cache/openvino/ort", exist_ok=True)
                providers.append(provider)
                options.append(
                    {
                        "cache_dir": "/config/model_cache/openvino/ort",
                        "device_type": "GPU",
                    }
                )
            else:
                providers.append(provider)
                options.append({})

        try:
            if os.path.exists(path):
                return ort.InferenceSession(
                    path, providers=providers, provider_options=options
                )
            else:
                raise FileNotFoundError(
                    errno.ENOENT,
                    os.strerror(errno.ENOENT),
                    path,
                )
        except Exception:
            s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"
            if not silent:
                logging.info(
                    f"The model file ({path}) doesn't exist "
                    f"or it is invalid. Downloading it from the public S3 "
                    f"bucket: {s3_url}."  # noqa: E501
                )

            # Download from S3
            # Saving to a temporary file first to avoid corrupting the file
            temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")

            # Create any missing directories in the path
            temporary_filename.parent.mkdir(parents=True, exist_ok=True)

            with requests.get(s3_url, stream=True) as r:
                r.raise_for_status()
                with open(temporary_filename, "wb") as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
                    f.flush()
            # Finally move the temporary file to the correct location
            temporary_filename.rename(path)
            return ort.InferenceSession(
                path, providers=provider, provider_options=options
            )


class ClipEmbedding(EmbeddingFunction):
    """Embedding function for CLIP model used in Chroma."""

    def __init__(self, model: str = "ViT-B/32"):
        """Initialize CLIP Embedding function."""
        self.model = Clip(model)

    def __call__(self, input: Union[Documents, Images]) -> Embeddings:
        embeddings: Embeddings = []
        for item in input:
            if is_image(item):
                result = self.model.get_image_embeddings([item])
                embeddings.append(result[0, :].tolist())
            elif is_document(item):
                result = self.model.get_text_embeddings([item])
                embeddings.append(result[0, :].tolist())
        return embeddings
Semantic Search for Detections (#11899) * Initial re-implementation of semantic search * put docker-compose back and make reindex match docs * remove debug code and fix import * fix docs * manually build pysqlite3 as binaries are only available for x86-64 * update comment in build_pysqlite3.sh * only embed objects * better error handling when genai fails * ask ollama to pull requested model at startup * update ollama docs * address some PR review comments * fix lint * use IPC to write description, update docs for reindex * remove gemini-pro-vision from docs as it will be unavailable soon * fix OpenAI doc available models * fix api error in gemini and metadata for embeddings 2024-06-21 23:30:19 +02:00			`"""CLIP Embeddings for Frigate."""`

Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`import errno`
			`import logging`
Semantic Search for Detections (#11899) * Initial re-implementation of semantic search * put docker-compose back and make reindex match docs * remove debug code and fix import * fix docs * manually build pysqlite3 as binaries are only available for x86-64 * update comment in build_pysqlite3.sh * only embed objects * better error handling when genai fails * ask ollama to pull requested model at startup * update ollama docs * address some PR review comments * fix lint * use IPC to write description, update docs for reindex * remove gemini-pro-vision from docs as it will be unavailable soon * fix OpenAI doc available models * fix api error in gemini and metadata for embeddings 2024-06-21 23:30:19 +02:00			`import os`
Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`from pathlib import Path`
Semantic Search for Detections (#11899) * Initial re-implementation of semantic search * put docker-compose back and make reindex match docs * remove debug code and fix import * fix docs * manually build pysqlite3 as binaries are only available for x86-64 * update comment in build_pysqlite3.sh * only embed objects * better error handling when genai fails * ask ollama to pull requested model at startup * update ollama docs * address some PR review comments * fix lint * use IPC to write description, update docs for reindex * remove gemini-pro-vision from docs as it will be unavailable soon * fix OpenAI doc available models * fix api error in gemini and metadata for embeddings 2024-06-21 23:30:19 +02:00			`from typing import Tuple, Union`

			`import onnxruntime as ort`
Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`import requests`
Semantic Search for Detections (#11899) * Initial re-implementation of semantic search * put docker-compose back and make reindex match docs * remove debug code and fix import * fix docs * manually build pysqlite3 as binaries are only available for x86-64 * update comment in build_pysqlite3.sh * only embed objects * better error handling when genai fails * ask ollama to pull requested model at startup * update ollama docs * address some PR review comments * fix lint * use IPC to write description, update docs for reindex * remove gemini-pro-vision from docs as it will be unavailable soon * fix OpenAI doc available models * fix api error in gemini and metadata for embeddings 2024-06-21 23:30:19 +02:00			`from chromadb import EmbeddingFunction, Embeddings`
			`from chromadb.api.types import (`
			`Documents,`
			`Images,`
			`is_document,`
			`is_image,`
			`)`
			`from onnx_clip import OnnxClip`

			`from frigate.const import MODEL_CACHE_DIR`


			`class Clip(OnnxClip):`
			`"""Override load models to download to cache directory."""`

			`@staticmethod`
			`def _load_models(`
			`model: str,`
			`silent: bool,`
			`) -> Tuple[ort.InferenceSession, ort.InferenceSession]:`
			`"""`
			`These models are a part of the container. Treat as as such.`
			`"""`
			`if model == "ViT-B/32":`
			`IMAGE_MODEL_FILE = "clip_image_model_vitb32.onnx"`
			`TEXT_MODEL_FILE = "clip_text_model_vitb32.onnx"`
			`elif model == "RN50":`
			`IMAGE_MODEL_FILE = "clip_image_model_rn50.onnx"`
			`TEXT_MODEL_FILE = "clip_text_model_rn50.onnx"`
			`else:`
			raise ValueError(f"Unexpected model {model}. No `.onnx` file found.")

			`models = []`
			`for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:`
			`path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)`
Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`models.append(Clip._load_model(path, silent))`
Semantic Search for Detections (#11899) * Initial re-implementation of semantic search * put docker-compose back and make reindex match docs * remove debug code and fix import * fix docs * manually build pysqlite3 as binaries are only available for x86-64 * update comment in build_pysqlite3.sh * only embed objects * better error handling when genai fails * ask ollama to pull requested model at startup * update ollama docs * address some PR review comments * fix lint * use IPC to write description, update docs for reindex * remove gemini-pro-vision from docs as it will be unavailable soon * fix OpenAI doc available models * fix api error in gemini and metadata for embeddings 2024-06-21 23:30:19 +02:00
			`return models[0], models[1]`

Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`@staticmethod`
			`def _load_model(path: str, silent: bool):`
Refactor onnx providers (#13804) * Ensure dirs exist for model caches * Formatting * Don't use tensorrt for embeddings 2024-09-17 22:54:44 +02:00			`providers = []`
Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`options = []`

Refactor onnx providers (#13804) * Ensure dirs exist for model caches * Formatting * Don't use tensorrt for embeddings 2024-09-17 22:54:44 +02:00			`for provider in ort.get_available_providers():`
Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`if provider == "TensorrtExecutionProvider":`
Refactor onnx providers (#13804) * Ensure dirs exist for model caches * Formatting * Don't use tensorrt for embeddings 2024-09-17 22:54:44 +02:00			`continue`
			`elif provider == "OpenVINOExecutionProvider":`
			`# TODO need to verify openvino works correctly`
			`os.makedirs("/config/model_cache/openvino/ort", exist_ok=True)`
			`providers.append(provider)`
Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`options.append(`
			`{`
Refactor onnx providers (#13804) * Ensure dirs exist for model caches * Formatting * Don't use tensorrt for embeddings 2024-09-17 22:54:44 +02:00			`"cache_dir": "/config/model_cache/openvino/ort",`
			`"device_type": "GPU",`
Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`}`
			`)`
			`else:`
Refactor onnx providers (#13804) * Ensure dirs exist for model caches * Formatting * Don't use tensorrt for embeddings 2024-09-17 22:54:44 +02:00			`providers.append(provider)`
Override onnx clip loading (#13800) * Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels 2024-09-17 21:24:35 +02:00			`options.append({})`

			`try:`
			`if os.path.exists(path):`
			`return ort.InferenceSession(`
			`path, providers=providers, provider_options=options`
			`)`
			`else:`
			`raise FileNotFoundError(`
			`errno.ENOENT,`
			`os.strerror(errno.ENOENT),`
			`path,`
			`)`
			`except Exception:`
			`s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"`
			`if not silent:`
			`logging.info(`
			`f"The model file ({path}) doesn't exist "`
			`f"or it is invalid. Downloading it from the public S3 "`
			`f"bucket: {s3_url}." # noqa: E501`
			`)`

			`# Download from S3`
			`# Saving to a temporary file first to avoid corrupting the file`
			`temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")`

			`# Create any missing directories in the path`
			`temporary_filename.parent.mkdir(parents=True, exist_ok=True)`

			`with requests.get(s3_url, stream=True) as r:`
			`r.raise_for_status()`
			`with open(temporary_filename, "wb") as f:`
			`for chunk in r.iter_content(chunk_size=8192):`
			`f.write(chunk)`
			`f.flush()`
			`# Finally move the temporary file to the correct location`
			`temporary_filename.rename(path)`
			`return ort.InferenceSession(`
			`path, providers=provider, provider_options=options`
			`)`

Semantic Search for Detections (#11899) * Initial re-implementation of semantic search * put docker-compose back and make reindex match docs * remove debug code and fix import * fix docs * manually build pysqlite3 as binaries are only available for x86-64 * update comment in build_pysqlite3.sh * only embed objects * better error handling when genai fails * ask ollama to pull requested model at startup * update ollama docs * address some PR review comments * fix lint * use IPC to write description, update docs for reindex * remove gemini-pro-vision from docs as it will be unavailable soon * fix OpenAI doc available models * fix api error in gemini and metadata for embeddings 2024-06-21 23:30:19 +02:00
			`class ClipEmbedding(EmbeddingFunction):`
			`"""Embedding function for CLIP model used in Chroma."""`

			`def __init__(self, model: str = "ViT-B/32"):`
			`"""Initialize CLIP Embedding function."""`
			`self.model = Clip(model)`

			`def __call__(self, input: Union[Documents, Images]) -> Embeddings:`
			`embeddings: Embeddings = []`
			`for item in input:`
			`if is_image(item):`
			`result = self.model.get_image_embeddings([item])`
			`embeddings.append(result[0, :].tolist())`
			`elif is_document(item):`
			`result = self.model.get_text_embeddings([item])`
			`embeddings.append(result[0, :].tolist())`
			`return embeddings`