diff --git a/Dockerfile b/Dockerfile index d1dbd0755..660cb5b25 100644 --- a/Dockerfile +++ b/Dockerfile @@ -262,15 +262,35 @@ FROM deps AS frigate WORKDIR /opt/frigate/ COPY --from=rootfs / / +# Build TensorRT-specific library +FROM nvcr.io/nvidia/tensorrt:23.03-py3 AS trt-deps + +RUN --mount=type=bind,source=docker/support/tensorrt_detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \ + /tensorrt_libyolo.sh + # Frigate w/ TensorRT Support as separate image FROM frigate AS frigate-tensorrt + +#Disable S6 Global timeout +ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0 + +ENV TRT_VER=8.5.3 +ENV YOLO_MODELS="yolov7-tiny-416" + +COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so +COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos +COPY docker/support/tensorrt_detector/rootfs/ / + RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ pip3 install -U /deps/trt-wheels/*.whl && \ - ln -s libnvrtc.so.11.2 /usr/local/lib/python3.9/dist-packages/nvidia/cuda_nvrtc/lib/libnvrtc.so && \ ldconfig # Dev Container w/ TRT FROM devcontainer AS devcontainer-trt +COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so +COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos +COPY docker/support/tensorrt_detector/rootfs/ / +COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ pip3 install -U /deps/trt-wheels/*.whl diff --git a/docker/install_deps.sh b/docker/install_deps.sh index 25b6951b5..7d5242d83 100755 --- a/docker/install_deps.sh +++ b/docker/install_deps.sh @@ -68,7 +68,7 @@ if [[ "${TARGETARCH}" == "arm64" ]]; then libva-drm2 mesa-va-drivers fi -apt-get purge gnupg apt-transport-https wget xz-utils -y +apt-get purge gnupg apt-transport-https xz-utils -y apt-get clean autoclean -y apt-get autoremove --purge -y rm -rf /var/lib/apt/lists/* diff --git a/docker/rootfs/etc/ld.so.conf.d/cuda_tensorrt.conf b/docker/support/tensorrt_detector/rootfs/etc/ld.so.conf.d/cuda_tensorrt.conf similarity index 94% rename from docker/rootfs/etc/ld.so.conf.d/cuda_tensorrt.conf rename to docker/support/tensorrt_detector/rootfs/etc/ld.so.conf.d/cuda_tensorrt.conf index d4248d047..fe16ed9c5 100644 --- a/docker/rootfs/etc/ld.so.conf.d/cuda_tensorrt.conf +++ b/docker/support/tensorrt_detector/rootfs/etc/ld.so.conf.d/cuda_tensorrt.conf @@ -1,3 +1,4 @@ +/usr/local/lib /usr/local/lib/python3.9/dist-packages/nvidia/cudnn/lib /usr/local/lib/python3.9/dist-packages/nvidia/cuda_runtime/lib /usr/local/lib/python3.9/dist-packages/nvidia/cublas/lib diff --git a/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/frigate/dependencies.d/trt-model-prepare b/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/frigate/dependencies.d/trt-model-prepare new file mode 100644 index 000000000..e69de29bb diff --git a/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/dependencies.d/base b/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/dependencies.d/base new file mode 100644 index 000000000..e69de29bb diff --git a/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run b/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run new file mode 100755 index 000000000..5f0e43553 --- /dev/null +++ b/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run @@ -0,0 +1,53 @@ +#!/command/with-contenv bash +# shellcheck shell=bash +# Generate models for the TensorRT detector + +set -o errexit -o nounset -o pipefail + +MODEL_CACHE_DIR=${MODEL_CACHE_DIR:-"/config/model_cache/tensorrt"} +OUTPUT_FOLDER="${MODEL_CACHE_DIR}/${TRT_VER}" + +# Create output folder +mkdir -p ${OUTPUT_FOLDER} + +FIRST_MODEL=true +MODEL_CONVERT="" + +for model in ${YOLO_MODELS//,/ } +do + # Remove old link in case path/version changed + rm -f ${MODEL_CACHE_DIR}/${model}.trt + + if [[ ! -f ${OUTPUT_FOLDER}/${model}.trt ]]; then + if [[ ${FIRST_MODEL} = true ]]; then + MODEL_CONVERT="${model}" + FIRST_MODEL=false; + else + MODEL_CONVERT+=",${model}"; + fi + else + ln -s ${OUTPUT_FOLDER}/${model}.trt ${MODEL_CACHE_DIR}/${model}.trt + fi +done + +if [[ -z ${MODEL_CONVERT} ]]; then + echo "No models to convert." + exit 0 +fi + +echo "Generating the following TRT Models: ${MODEL_CONVERT}" + +# Build trt engine +cd /usr/local/src/tensorrt_demos/yolo + +# Download yolo weights +./download_yolo.sh $MODEL_CONVERT > /dev/null + +for model in ${MODEL_CONVERT//,/ } +do + echo "Converting ${model} model" + python3 yolo_to_onnx.py -m ${model} > /dev/null + python3 onnx_to_tensorrt.py -m ${model} > /dev/null + cp ${model}.trt ${OUTPUT_FOLDER}/${model}.trt + ln -s ${OUTPUT_FOLDER}/${model}.trt ${MODEL_CACHE_DIR}/${model}.trt +done diff --git a/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/type b/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/type new file mode 100644 index 000000000..bdd22a185 --- /dev/null +++ b/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/type @@ -0,0 +1 @@ +oneshot diff --git a/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/up b/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/up new file mode 100644 index 000000000..b9de40ad0 --- /dev/null +++ b/docker/support/tensorrt_detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/up @@ -0,0 +1 @@ +/etc/s6-overlay/s6-rc.d/trt-model-prepare/run diff --git a/docker/support/tensorrt_detector/tensorrt_libyolo.sh b/docker/support/tensorrt_detector/tensorrt_libyolo.sh new file mode 100755 index 000000000..e6fc415e5 --- /dev/null +++ b/docker/support/tensorrt_detector/tensorrt_libyolo.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -euxo pipefail + +SCRIPT_DIR="/usr/local/src/tensorrt_demos" + +# Clone tensorrt_demos repo +git clone --depth 1 https://github.com/NateMeyer/tensorrt_demos.git -b conditional_download + +# Build libyolo +cd ./tensorrt_demos/plugins && make all +cp libyolo_layer.so /usr/local/lib/libyolo_layer.so + +# Store yolo scripts for later conversion +cd ../ +mkdir -p ${SCRIPT_DIR}/plugins +cp plugins/libyolo_layer.so ${SCRIPT_DIR}/plugins/libyolo_layer.so +cp -a yolo ${SCRIPT_DIR}/ diff --git a/docker/tensorrt_models.sh b/docker/tensorrt_models.sh deleted file mode 100755 index 957e817d6..000000000 --- a/docker/tensorrt_models.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -set -euxo pipefail - -CUDA_HOME=/usr/local/cuda -LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64 -OUTPUT_FOLDER=/tensorrt_models -echo "Generating the following TRT Models: ${YOLO_MODELS:="yolov4-tiny-288,yolov4-tiny-416,yolov7-tiny-416"}" - -# Create output folder -mkdir -p ${OUTPUT_FOLDER} - -# Install packages -pip install --upgrade pip && pip install onnx==1.9.0 protobuf==3.20.3 - -# Clone tensorrt_demos repo -git clone --depth 1 https://github.com/yeahme49/tensorrt_demos.git /tensorrt_demos - -# Build libyolo -cd /tensorrt_demos/plugins && make all -cp libyolo_layer.so ${OUTPUT_FOLDER}/libyolo_layer.so - -# Download yolo weights -cd /tensorrt_demos/yolo && ./download_yolo.sh - -# Build trt engine -cd /tensorrt_demos/yolo - -for model in ${YOLO_MODELS//,/ } -do - python3 yolo_to_onnx.py -m ${model} - python3 onnx_to_tensorrt.py -m ${model} - cp /tensorrt_demos/yolo/${model}.trt ${OUTPUT_FOLDER}/${model}.trt; -done diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 3f48423bc..d684a2917 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -174,9 +174,7 @@ NVidia GPUs may be used for object detection using the TensorRT libraries. Due t ### Minimum Hardware Support -The TensorRT detector uses the 11.x series of CUDA libraries which have minor version compatibility. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the NVIDIA GPU Compute Capability table linked below. - -> **TODO:** NVidia claims support on compute 3.5 and 3.7, but marks it as deprecated. This would have some, but not all, Kepler GPUs as possibly working. This needs testing before making any claims of support. +The TensorRT detector uses the 12.x series of CUDA libraries which have minor version compatibility. The minimum driver version on the host system must be `>=525.60.13`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the NVIDIA GPU Compute Capability table linked below. To use the TensorRT detector, make sure your host system has the [nvidia-container-runtime](https://docs.docker.com/config/containers/resource_constraints/#access-an-nvidia-gpu) installed to pass through the GPU to the container and the host system has a compatible driver installed for your GPU. @@ -192,22 +190,15 @@ There are improved capabilities in newer GPU architectures that TensorRT can ben ### Generate Models -The model used for TensorRT must be preprocessed on the same hardware platform that they will run on. This means that each user must run additional setup to generate a model file for the TensorRT library. A script is provided that will build several common models. +The model used for TensorRT must be preprocessed on the same hardware platform that they will run on. This means that each user must run additional setup to generate a model file for the TensorRT library. A script is included that will build several common models. -To generate model files, create a new folder to save the models, download the script, and launch a docker container that will run the script. +The Frigate image will generate model files during startup if the specified model is not found. Processed models are stored in the `/config/model_cache` folder. Typically the `/config` path is mapped to a directory on the host already and the `model_cache` does not need to be mapped separately unless the user wants to store it in a different location on the host. -```bash -mkdir trt-models -wget https://github.com/blakeblackshear/frigate/raw/master/docker/tensorrt_models.sh -chmod +x tensorrt_models.sh -docker run --gpus=all --rm -it -v `pwd`/trt-models:/tensorrt_models -v `pwd`/tensorrt_models.sh:/tensorrt_models.sh nvcr.io/nvidia/tensorrt:22.07-py3 /tensorrt_models.sh -``` +To by default, the `yolov7-tiny-416` model will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. To select no model generation, set the variable to an empty string, `YOLO_MODELS=""`. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder. -The `trt-models` folder can then be mapped into your Frigate container as `trt-models` and the models referenced from the config. +If your GPU does not support FP16 operations, you can pass the environment variable `USE_FP16=False` to disable it. -If your GPU does not support FP16 operations, you can pass the environment variable `-e USE_FP16=False` to the `docker run` command to disable it. - -Specific models can be selected by passing an environment variable to the `docker run` command. Use the form `-e YOLO_MODELS=yolov4-416,yolov4-tiny-416` to select one or more model names. The models available are shown below. +Specific models can be selected by passing an environment variable to the `docker run` command or in your `docker-compose.yml` file. Use the form `-e YOLO_MODELS=yolov4-416,yolov4-tiny-416` to select one or more model names. The models available are shown below. ``` yolov3-288 @@ -237,11 +228,20 @@ yolov7x-640 yolov7x-320 ``` +An example `docker-compose.yml` fragment that converts the `yolov4-608` and `yolov7x-640` models for a Pascal card would look something like this: + +```yml +frigate: + environment: + - YOLO_MODELS="yolov4-608,yolov7x-640" + - USE_FP16=false +``` + ### Configuration Parameters The TensorRT detector can be selected by specifying `tensorrt` as the model type. The GPU will need to be passed through to the docker container using the same methods described in the [Hardware Acceleration](hardware_acceleration.md#nvidia-gpu) section. If you pass through multiple GPUs, you can select which GPU is used for a detector with the `device` configuration parameter. The `device` parameter is an integer value of the GPU index, as shown by `nvidia-smi` within the container. -The TensorRT detector uses `.trt` model files that are located in `/trt-models/` by default. These model file path and dimensions used will depend on which model you have generated. +The TensorRT detector uses `.trt` model files that are located in `/config/model_cache/tensorrt` by default. These model path and dimensions used will depend on which model you have generated. ```yaml detectors: @@ -250,7 +250,7 @@ detectors: device: 0 #This is the default, select the first GPU model: - path: /trt-models/yolov7-tiny-416.trt + path: /config/model_cache/tensorrt/yolov7-tiny-416.trt input_tensor: nchw input_pixel_format: rgb width: 416 diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index 36233ea68..5daf8fe3b 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -72,7 +72,7 @@ Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known ### TensorRT -The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector). +The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 12.x series of CUDA libraries. The minimum driver version on the host system must be `>=525.60.13`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector). Inference speeds will vary greatly depending on the GPU and the model used. `tiny` variants are faster than the equivalent non-tiny model, some known examples are below: diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index 7251b8751..dea3fe078 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -78,7 +78,7 @@ class TensorRtDetector(DetectionApi): try: trt.init_libnvinfer_plugins(self.trt_logger, "") - ctypes.cdll.LoadLibrary("/trt-models/libyolo_layer.so") + ctypes.cdll.LoadLibrary("/usr/local/lib/libyolo_layer.so") except OSError as e: logger.error( "ERROR: failed to load libraries. %s", diff --git a/requirements-tensorrt.txt b/requirements-tensorrt.txt index 90517babd..214202e43 100644 --- a/requirements-tensorrt.txt +++ b/requirements-tensorrt.txt @@ -1,9 +1,12 @@ # NVidia TensorRT Support (amd64 only) -nvidia-pyindex; platform_machine == 'x86_64' -nvidia-tensorrt == 8.4.1.5; platform_machine == 'x86_64' -cuda-python == 11.7; platform_machine == 'x86_64' +--extra-index-url 'https://pypi.nvidia.com' +numpy < 1.24; platform_machine == 'x86_64' +tensorrt == 8.5.3.*; platform_machine == 'x86_64' +cuda-python == 11.8; platform_machine == 'x86_64' cython == 0.29.*; platform_machine == 'x86_64' -nvidia-cuda-runtime-cu11 == 11.7.*; platform_machine == 'x86_64' -nvidia-cublas-cu11 == 11.11.*; platform_machine == 'x86_64' -nvidia-cudnn-cu11 == 8.7.*; platform_machine == 'x86_64' -nvidia-cuda-nvrtc-cu11 == 11.7.*; platform_machine == 'x86_64' \ No newline at end of file +nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64' +nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64' +nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64' +nvidia-cudnn-cu11 == 8.6.0.*; platform_machine == 'x86_64' +onnx==1.14.0; platform_machine == 'x86_64' +protobuf==3.20.3; platform_machine == 'x86_64' \ No newline at end of file