Embedding gpu (#14253)

This commit is contained in:
Nicolas Mowen 2024-10-09 19:46:31 -06:00 committed by GitHub
parent 9fda259c0c
commit bc3a06178b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 34 additions and 33 deletions

View File

@ -180,9 +180,6 @@ RUN /build_pysqlite3.sh
COPY docker/main/requirements-wheels.txt /requirements-wheels.txt COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt
COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt
RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt
# Collect deps in a single layer # Collect deps in a single layer
FROM scratch AS deps-rootfs FROM scratch AS deps-rootfs
@ -225,14 +222,6 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
python3 -m pip install --upgrade pip && \ python3 -m pip install --upgrade pip && \
pip3 install -U /deps/wheels/*.whl pip3 install -U /deps/wheels/*.whl
# We have to uninstall this dependency specifically
# as it will break onnxruntime-openvino
RUN pip3 uninstall -y onnxruntime
RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \
python3 -m pip install --upgrade pip && \
pip3 install -U /deps/wheels/*.whl
COPY --from=deps-rootfs / / COPY --from=deps-rootfs / /
RUN ldconfig RUN ldconfig

View File

@ -1,3 +0,0 @@
# ONNX
onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64'
onnxruntime == 1.19.* ; platform_machine == 'aarch64'

View File

@ -30,11 +30,12 @@ norfair == 2.2.*
setproctitle == 1.3.* setproctitle == 1.3.*
ws4py == 0.5.* ws4py == 0.5.*
unidecode == 1.3.* unidecode == 1.3.*
# OpenVino (ONNX installed in wheels-post) # OpenVino & ONNX
openvino == 2024.3.* openvino == 2024.3.*
onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64'
onnxruntime == 1.19.* ; platform_machine == 'aarch64'
# Embeddings # Embeddings
transformers == 4.45.* transformers == 4.45.*
onnx_clip == 4.0.*
# Generative AI # Generative AI
google-generativeai == 0.8.* google-generativeai == 0.8.*
ollama == 0.3.* ollama == 0.3.*

View File

@ -29,6 +29,12 @@ If you are enabling the Search feature for the first time, be advised that Friga
### Jina AI CLIP ### Jina AI CLIP
:::tip
The CLIP models are downloaded in ONNX format, which means they will be accelerated using GPU hardware when available. This depends on the Docker build that is used. See [the object detector docs](../configuration/object_detectors.md) for more information.
:::
The vision model is able to embed both images and text into the same vector space, which allows `image -> image` and `text -> image` similarity searches. Frigate uses this model on tracked objects to encode the thumbnail image and store it in the database. When searching for tracked objects via text in the search box, Frigate will perform a `text -> image` similarity search against this embedding. When clicking "Find Similar" in the tracked object detail pane, Frigate will perform an `image -> image` similarity search to retrieve the closest matching thumbnails. The vision model is able to embed both images and text into the same vector space, which allows `image -> image` and `text -> image` similarity searches. Frigate uses this model on tracked objects to encode the thumbnail image and store it in the database. When searching for tracked objects via text in the search box, Frigate will perform a `text -> image` similarity search against this embedding. When clicking "Find Similar" in the tracked object detail pane, Frigate will perform an `image -> image` similarity search to retrieve the closest matching thumbnails.
The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions. The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions.

View File

@ -118,7 +118,7 @@ class Embeddings:
}, },
embedding_function=jina_text_embedding_function, embedding_function=jina_text_embedding_function,
model_type="text", model_type="text",
preferred_providers=["CPUExecutionProvider"], force_cpu=True,
) )
self.vision_embedding = GenericONNXEmbedding( self.vision_embedding = GenericONNXEmbedding(
@ -130,7 +130,6 @@ class Embeddings:
}, },
embedding_function=jina_vision_embedding_function, embedding_function=jina_vision_embedding_function,
model_type="vision", model_type="vision",
preferred_providers=["CPUExecutionProvider"],
) )
def _create_tables(self): def _create_tables(self):

View File

@ -18,6 +18,7 @@ from transformers.utils.logging import disable_progress_bar
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
from frigate.types import ModelStatusTypesEnum from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader from frigate.util.downloader import ModelDownloader
from frigate.util.model import get_ort_providers
warnings.filterwarnings( warnings.filterwarnings(
"ignore", "ignore",
@ -40,8 +41,8 @@ class GenericONNXEmbedding:
download_urls: Dict[str, str], download_urls: Dict[str, str],
embedding_function: Callable[[List[np.ndarray]], np.ndarray], embedding_function: Callable[[List[np.ndarray]], np.ndarray],
model_type: str, model_type: str,
preferred_providers: List[str] = ["CPUExecutionProvider"],
tokenizer_file: Optional[str] = None, tokenizer_file: Optional[str] = None,
force_cpu: bool = False,
): ):
self.model_name = model_name self.model_name = model_name
self.model_file = model_file self.model_file = model_file
@ -49,7 +50,9 @@ class GenericONNXEmbedding:
self.download_urls = download_urls self.download_urls = download_urls
self.embedding_function = embedding_function self.embedding_function = embedding_function
self.model_type = model_type # 'text' or 'vision' self.model_type = model_type # 'text' or 'vision'
self.preferred_providers = preferred_providers self.providers, self.provider_options = get_ort_providers(
force_cpu=force_cpu, requires_fp16=True
)
self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name) self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
self.tokenizer = None self.tokenizer = None
@ -105,8 +108,7 @@ class GenericONNXEmbedding:
else: else:
self.feature_extractor = self._load_feature_extractor() self.feature_extractor = self._load_feature_extractor()
self.session = self._load_model( self.session = self._load_model(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file)
self.preferred_providers,
) )
def _load_tokenizer(self): def _load_tokenizer(self):
@ -123,9 +125,11 @@ class GenericONNXEmbedding:
f"{MODEL_CACHE_DIR}/{self.model_name}", f"{MODEL_CACHE_DIR}/{self.model_name}",
) )
def _load_model(self, path: str, providers: List[str]): def _load_model(self, path: str):
if os.path.exists(path): if os.path.exists(path):
return ort.InferenceSession(path, providers=providers) return ort.InferenceSession(
path, providers=self.providers, provider_options=self.provider_options
)
else: else:
logger.warning(f"{self.model_name} model file {path} not found.") logger.warning(f"{self.model_name} model file {path} not found.")
return None return None

View File

@ -6,7 +6,7 @@ import onnxruntime as ort
def get_ort_providers( def get_ort_providers(
force_cpu: bool = False, openvino_device: str = "AUTO" force_cpu: bool = False, openvino_device: str = "AUTO", requires_fp16: bool = False
) -> tuple[list[str], list[dict[str, any]]]: ) -> tuple[list[str], list[dict[str, any]]]:
if force_cpu: if force_cpu:
return (["CPUExecutionProvider"], [{}]) return (["CPUExecutionProvider"], [{}])
@ -17,14 +17,19 @@ def get_ort_providers(
for provider in providers: for provider in providers:
if provider == "TensorrtExecutionProvider": if provider == "TensorrtExecutionProvider":
os.makedirs("/config/model_cache/tensorrt/ort/trt-engines", exist_ok=True) os.makedirs("/config/model_cache/tensorrt/ort/trt-engines", exist_ok=True)
options.append(
{ if not requires_fp16 or os.environ.get("USE_FP_16", "True") != "False":
"trt_timing_cache_enable": True, options.append(
"trt_engine_cache_enable": True, {
"trt_timing_cache_path": "/config/model_cache/tensorrt/ort", "trt_fp16_enable": requires_fp16,
"trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines", "trt_timing_cache_enable": True,
} "trt_engine_cache_enable": True,
) "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
"trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
}
)
else:
options.append({})
elif provider == "OpenVINOExecutionProvider": elif provider == "OpenVINOExecutionProvider":
os.makedirs("/config/model_cache/openvino/ort", exist_ok=True) os.makedirs("/config/model_cache/openvino/ort", exist_ok=True)
options.append( options.append(