From 36d7eb7caaec6d920e130265a9e29d291ea2a3ad Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Mon, 16 Sep 2024 18:18:11 -0600
Subject: [PATCH] Support ONNX model caching (#13780)

* Support model caching

* Cleanup
---
 docker/tensorrt/Dockerfile.amd64       |  5 ++++-
 docker/tensorrt/requirements-amd64.txt |  2 +-
 frigate/detectors/plugins/onnx.py      | 23 ++++++++++++++++++++++-
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/docker/tensorrt/Dockerfile.amd64 b/docker/tensorrt/Dockerfile.amd64
index b1ea1ced0..61d3264c9 100644
--- a/docker/tensorrt/Dockerfile.amd64
+++ b/docker/tensorrt/Dockerfile.amd64
@@ -3,6 +3,8 @@
 # https://askubuntu.com/questions/972516/debian-frontend-environment-variable
 ARG DEBIAN_FRONTEND=noninteractive
 
+ARG TRT_BASE=nvcr.io/nvidia/tensorrt:23.03-py3
+
 # Make this a separate target so it can be built/cached optionally
 FROM wheels as trt-wheels
 ARG DEBIAN_FRONTEND
@@ -13,7 +15,7 @@ COPY docker/tensorrt/requirements-amd64.txt /requirements-tensorrt.txt
 RUN mkdir -p /trt-wheels && pip3 wheel --wheel-dir=/trt-wheels -r /requirements-tensorrt.txt
 
 # Build CuDNN
-FROM tensorrt-base AS cudnn-deps
+FROM ${TRT_BASE} AS cudnn-deps
 
 ARG COMPUTE_LEVEL
 
@@ -31,6 +33,7 @@ ENV TRT_VER=8.5.3
 RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
     pip3 install -U /deps/trt-wheels/*.whl && \
     ldconfig
+COPY --from=cudnn-deps /usr/local/cuda-12.6 /usr/local/cuda
 
 ENV LD_LIBRARY_PATH=/usr/local/lib/python3.9/dist-packages/tensorrt:/usr/local/cuda/lib64:/usr/local/lib/python3.9/dist-packages/nvidia/cufft/lib
 WORKDIR /opt/frigate/
diff --git a/docker/tensorrt/requirements-amd64.txt b/docker/tensorrt/requirements-amd64.txt
index dd99190d0..b5ad4fcbd 100644
--- a/docker/tensorrt/requirements-amd64.txt
+++ b/docker/tensorrt/requirements-amd64.txt
@@ -7,7 +7,7 @@ cython == 0.29.*; platform_machine == 'x86_64'
 nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64'
 nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64'
 nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64'
-nvidia-cudnn-cu11 == 8.5.0.*; platform_machine == 'x86_64'
+nvidia-cudnn-cu11 == 8.6.0.*; platform_machine == 'x86_64'
 nvidia-cufft-cu11==10.*; platform_machine == 'x86_64'
 onnx==1.14.0; platform_machine == 'x86_64'
 onnxruntime-gpu==1.17.*; platform_machine == 'x86_64'
diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py
index 1939b7323..ccd0ffc68 100644
--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@@ -36,7 +36,28 @@ class ONNXDetector(DetectionApi):
 
         path = detector_config.model.path
         logger.info(f"ONNX: loading {detector_config.model.path}")
-        self.model = ort.InferenceSession(path, providers=ort.get_available_providers())
+
+        providers = ort.get_available_providers()
+        options = []
+
+        for provider in providers:
+            if provider == "TensorrtExecutionProvider":
+                options.append(
+                    {
+                        "trt_timing_cache_enable": True,
+                        "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
+                        "trt_engine_cache_enable": True,
+                        "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
+                    }
+                )
+            elif provider == "OpenVINOExecutionProvider":
+                options.append({"cache_dir": "/config/model_cache/openvino/ort"})
+            else:
+                options.append({})
+
+        self.model = ort.InferenceSession(
+            path, providers=providers, provider_options=options
+        )
 
         self.h = detector_config.model.height
         self.w = detector_config.model.width