diff --git a/docker/main/Dockerfile b/docker/main/Dockerfile
index 128159d5f..ac4d277bd 100644
--- a/docker/main/Dockerfile
+++ b/docker/main/Dockerfile
@@ -180,9 +180,6 @@ RUN /build_pysqlite3.sh
 COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
 RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt
 
-COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt
-RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt
-
 
 # Collect deps in a single layer
 FROM scratch AS deps-rootfs
@@ -225,14 +222,6 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
     python3 -m pip install --upgrade pip && \
     pip3 install -U /deps/wheels/*.whl
 
-# We have to uninstall this dependency specifically
-# as it will break onnxruntime-openvino
-RUN pip3 uninstall -y onnxruntime
-
-RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \
-    python3 -m pip install --upgrade pip && \
-    pip3 install -U /deps/wheels/*.whl
-
 COPY --from=deps-rootfs / /
 
 RUN ldconfig
diff --git a/docker/main/requirements-wheels-post.txt b/docker/main/requirements-wheels-post.txt
deleted file mode 100644
index a1686f091..000000000
--- a/docker/main/requirements-wheels-post.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-# ONNX
-onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64'
-onnxruntime == 1.19.* ; platform_machine == 'aarch64'
\ No newline at end of file
diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt
index 5db2e8886..11ad94f3f 100644
--- a/docker/main/requirements-wheels.txt
+++ b/docker/main/requirements-wheels.txt
@@ -30,11 +30,12 @@ norfair == 2.2.*
 setproctitle == 1.3.*
 ws4py == 0.5.*
 unidecode == 1.3.*
-# OpenVino (ONNX installed in wheels-post)
+# OpenVino & ONNX
 openvino == 2024.3.*
+onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64'
+onnxruntime == 1.19.* ; platform_machine == 'aarch64'
 # Embeddings
 transformers == 4.45.*
-onnx_clip == 4.0.*
 # Generative AI
 google-generativeai == 0.8.*
 ollama == 0.3.*
diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index 8e9c4abc2..7cb8ca769 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -29,6 +29,12 @@ If you are enabling the Search feature for the first time, be advised that Friga
 
 ### Jina AI CLIP
 
+:::tip
+
+The CLIP models are downloaded in ONNX format, which means they will be accelerated using GPU hardware when available. This depends on the Docker build that is used. See [the object detector docs](../configuration/object_detectors.md) for more information.
+
+:::
+
 The vision model is able to embed both images and text into the same vector space, which allows `image -> image` and `text -> image` similarity searches. Frigate uses this model on tracked objects to encode the thumbnail image and store it in the database. When searching for tracked objects via text in the search box, Frigate will perform a `text -> image` similarity search against this embedding. When clicking "Find Similar" in the tracked object detail pane, Frigate will perform an `image -> image` similarity search to retrieve the closest matching thumbnails.
 
 The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions.
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index 99a2d8ab0..35d76dece 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -118,7 +118,7 @@ class Embeddings:
             },
             embedding_function=jina_text_embedding_function,
             model_type="text",
-            preferred_providers=["CPUExecutionProvider"],
+            force_cpu=True,
         )
 
         self.vision_embedding = GenericONNXEmbedding(
@@ -130,7 +130,6 @@ class Embeddings:
             },
             embedding_function=jina_vision_embedding_function,
             model_type="vision",
-            preferred_providers=["CPUExecutionProvider"],
         )
 
     def _create_tables(self):
diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py
index 396145380..b5f15f391 100644
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@@ -18,6 +18,7 @@ from transformers.utils.logging import disable_progress_bar
 from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
 from frigate.types import ModelStatusTypesEnum
 from frigate.util.downloader import ModelDownloader
+from frigate.util.model import get_ort_providers
 
 warnings.filterwarnings(
     "ignore",
@@ -40,8 +41,8 @@ class GenericONNXEmbedding:
         download_urls: Dict[str, str],
         embedding_function: Callable[[List[np.ndarray]], np.ndarray],
         model_type: str,
-        preferred_providers: List[str] = ["CPUExecutionProvider"],
         tokenizer_file: Optional[str] = None,
+        force_cpu: bool = False,
     ):
         self.model_name = model_name
         self.model_file = model_file
@@ -49,7 +50,9 @@ class GenericONNXEmbedding:
         self.download_urls = download_urls
         self.embedding_function = embedding_function
         self.model_type = model_type  # 'text' or 'vision'
-        self.preferred_providers = preferred_providers
+        self.providers, self.provider_options = get_ort_providers(
+            force_cpu=force_cpu, requires_fp16=True
+        )
 
         self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
         self.tokenizer = None
@@ -105,8 +108,7 @@ class GenericONNXEmbedding:
             else:
                 self.feature_extractor = self._load_feature_extractor()
             self.session = self._load_model(
-                os.path.join(self.download_path, self.model_file),
-                self.preferred_providers,
+                os.path.join(self.download_path, self.model_file)
             )
 
     def _load_tokenizer(self):
@@ -123,9 +125,11 @@ class GenericONNXEmbedding:
             f"{MODEL_CACHE_DIR}/{self.model_name}",
         )
 
-    def _load_model(self, path: str, providers: List[str]):
+    def _load_model(self, path: str):
         if os.path.exists(path):
-            return ort.InferenceSession(path, providers=providers)
+            return ort.InferenceSession(
+                path, providers=self.providers, provider_options=self.provider_options
+            )
         else:
             logger.warning(f"{self.model_name} model file {path} not found.")
             return None
diff --git a/frigate/util/model.py b/frigate/util/model.py
index 6716b2405..fabade387 100644
--- a/frigate/util/model.py
+++ b/frigate/util/model.py
@@ -6,7 +6,7 @@ import onnxruntime as ort
 
 
 def get_ort_providers(
-    force_cpu: bool = False, openvino_device: str = "AUTO"
+    force_cpu: bool = False, openvino_device: str = "AUTO", requires_fp16: bool = False
 ) -> tuple[list[str], list[dict[str, any]]]:
     if force_cpu:
         return (["CPUExecutionProvider"], [{}])
@@ -17,14 +17,19 @@ def get_ort_providers(
     for provider in providers:
         if provider == "TensorrtExecutionProvider":
             os.makedirs("/config/model_cache/tensorrt/ort/trt-engines", exist_ok=True)
-            options.append(
-                {
-                    "trt_timing_cache_enable": True,
-                    "trt_engine_cache_enable": True,
-                    "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
-                    "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
-                }
-            )
+
+            if not requires_fp16 or os.environ.get("USE_FP_16", "True") != "False":
+                options.append(
+                    {
+                        "trt_fp16_enable": requires_fp16,
+                        "trt_timing_cache_enable": True,
+                        "trt_engine_cache_enable": True,
+                        "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
+                        "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
+                    }
+                )
+            else:
+                options.append({})
         elif provider == "OpenVINOExecutionProvider":
             os.makedirs("/config/model_cache/openvino/ort", exist_ok=True)
             options.append(