Upgrade TensorRT to 8.5.3 (#7006)

* Update to latest tensorrt (8.6.1) release

* Build trt libyolo_layer.so in container

* Update tensorrt_models script to convert models from the frigate container

* Fix typo in model script

* Fix paths to yolo lib and models folder

* Add S6 scripts to test and convert specified TensortRT models at startup.

Rearrange tensorrt files into a docker support folder.

* Update TensorRT documentation to reflect the new model conversion process and minimum HW support.

* Fix model_cache path to live in config directory

* Move tensorrt s6 files to the correct directory

* Fix issues in model generation script

* Disable global timeout for s6 services

* Add version folder to tensorrt model_cache path

* Include TensorRT version 8.5.3

* Add numpy requirement prior to removal of np.bool

* This TRT version uses a mixture of cuda dependencies

* Redirect stdout from noisy model conversion
This commit is contained in:
Nate Meyer 2023-07-06 15:20:33 -04:00 committed by GitHub
parent 30dfdf47d4
commit dd02958f7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 125 additions and 62 deletions

View File

@ -262,15 +262,35 @@ FROM deps AS frigate
WORKDIR /opt/frigate/ WORKDIR /opt/frigate/
COPY --from=rootfs / / COPY --from=rootfs / /
# Build TensorRT-specific library
FROM nvcr.io/nvidia/tensorrt:23.03-py3 AS trt-deps
RUN --mount=type=bind,source=docker/support/tensorrt_detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \
/tensorrt_libyolo.sh
# Frigate w/ TensorRT Support as separate image # Frigate w/ TensorRT Support as separate image
FROM frigate AS frigate-tensorrt FROM frigate AS frigate-tensorrt
#Disable S6 Global timeout
ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0
ENV TRT_VER=8.5.3
ENV YOLO_MODELS="yolov7-tiny-416"
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos
COPY docker/support/tensorrt_detector/rootfs/ /
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
pip3 install -U /deps/trt-wheels/*.whl && \ pip3 install -U /deps/trt-wheels/*.whl && \
ln -s libnvrtc.so.11.2 /usr/local/lib/python3.9/dist-packages/nvidia/cuda_nvrtc/lib/libnvrtc.so && \
ldconfig ldconfig
# Dev Container w/ TRT # Dev Container w/ TRT
FROM devcontainer AS devcontainer-trt FROM devcontainer AS devcontainer-trt
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos
COPY docker/support/tensorrt_detector/rootfs/ /
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
pip3 install -U /deps/trt-wheels/*.whl pip3 install -U /deps/trt-wheels/*.whl

View File

@ -68,7 +68,7 @@ if [[ "${TARGETARCH}" == "arm64" ]]; then
libva-drm2 mesa-va-drivers libva-drm2 mesa-va-drivers
fi fi
apt-get purge gnupg apt-transport-https wget xz-utils -y apt-get purge gnupg apt-transport-https xz-utils -y
apt-get clean autoclean -y apt-get clean autoclean -y
apt-get autoremove --purge -y apt-get autoremove --purge -y
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*

View File

@ -1,3 +1,4 @@
/usr/local/lib
/usr/local/lib/python3.9/dist-packages/nvidia/cudnn/lib /usr/local/lib/python3.9/dist-packages/nvidia/cudnn/lib
/usr/local/lib/python3.9/dist-packages/nvidia/cuda_runtime/lib /usr/local/lib/python3.9/dist-packages/nvidia/cuda_runtime/lib
/usr/local/lib/python3.9/dist-packages/nvidia/cublas/lib /usr/local/lib/python3.9/dist-packages/nvidia/cublas/lib

View File

@ -0,0 +1,53 @@
#!/command/with-contenv bash
# shellcheck shell=bash
# Generate models for the TensorRT detector
set -o errexit -o nounset -o pipefail
MODEL_CACHE_DIR=${MODEL_CACHE_DIR:-"/config/model_cache/tensorrt"}
OUTPUT_FOLDER="${MODEL_CACHE_DIR}/${TRT_VER}"
# Create output folder
mkdir -p ${OUTPUT_FOLDER}
FIRST_MODEL=true
MODEL_CONVERT=""
for model in ${YOLO_MODELS//,/ }
do
# Remove old link in case path/version changed
rm -f ${MODEL_CACHE_DIR}/${model}.trt
if [[ ! -f ${OUTPUT_FOLDER}/${model}.trt ]]; then
if [[ ${FIRST_MODEL} = true ]]; then
MODEL_CONVERT="${model}"
FIRST_MODEL=false;
else
MODEL_CONVERT+=",${model}";
fi
else
ln -s ${OUTPUT_FOLDER}/${model}.trt ${MODEL_CACHE_DIR}/${model}.trt
fi
done
if [[ -z ${MODEL_CONVERT} ]]; then
echo "No models to convert."
exit 0
fi
echo "Generating the following TRT Models: ${MODEL_CONVERT}"
# Build trt engine
cd /usr/local/src/tensorrt_demos/yolo
# Download yolo weights
./download_yolo.sh $MODEL_CONVERT > /dev/null
for model in ${MODEL_CONVERT//,/ }
do
echo "Converting ${model} model"
python3 yolo_to_onnx.py -m ${model} > /dev/null
python3 onnx_to_tensorrt.py -m ${model} > /dev/null
cp ${model}.trt ${OUTPUT_FOLDER}/${model}.trt
ln -s ${OUTPUT_FOLDER}/${model}.trt ${MODEL_CACHE_DIR}/${model}.trt
done

View File

@ -0,0 +1 @@
/etc/s6-overlay/s6-rc.d/trt-model-prepare/run

View File

@ -0,0 +1,18 @@
#!/bin/bash
set -euxo pipefail
SCRIPT_DIR="/usr/local/src/tensorrt_demos"
# Clone tensorrt_demos repo
git clone --depth 1 https://github.com/NateMeyer/tensorrt_demos.git -b conditional_download
# Build libyolo
cd ./tensorrt_demos/plugins && make all
cp libyolo_layer.so /usr/local/lib/libyolo_layer.so
# Store yolo scripts for later conversion
cd ../
mkdir -p ${SCRIPT_DIR}/plugins
cp plugins/libyolo_layer.so ${SCRIPT_DIR}/plugins/libyolo_layer.so
cp -a yolo ${SCRIPT_DIR}/

View File

@ -1,34 +0,0 @@
#!/bin/bash
set -euxo pipefail
CUDA_HOME=/usr/local/cuda
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
OUTPUT_FOLDER=/tensorrt_models
echo "Generating the following TRT Models: ${YOLO_MODELS:="yolov4-tiny-288,yolov4-tiny-416,yolov7-tiny-416"}"
# Create output folder
mkdir -p ${OUTPUT_FOLDER}
# Install packages
pip install --upgrade pip && pip install onnx==1.9.0 protobuf==3.20.3
# Clone tensorrt_demos repo
git clone --depth 1 https://github.com/yeahme49/tensorrt_demos.git /tensorrt_demos
# Build libyolo
cd /tensorrt_demos/plugins && make all
cp libyolo_layer.so ${OUTPUT_FOLDER}/libyolo_layer.so
# Download yolo weights
cd /tensorrt_demos/yolo && ./download_yolo.sh
# Build trt engine
cd /tensorrt_demos/yolo
for model in ${YOLO_MODELS//,/ }
do
python3 yolo_to_onnx.py -m ${model}
python3 onnx_to_tensorrt.py -m ${model}
cp /tensorrt_demos/yolo/${model}.trt ${OUTPUT_FOLDER}/${model}.trt;
done

View File

@ -174,9 +174,7 @@ NVidia GPUs may be used for object detection using the TensorRT libraries. Due t
### Minimum Hardware Support ### Minimum Hardware Support
The TensorRT detector uses the 11.x series of CUDA libraries which have minor version compatibility. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the NVIDIA GPU Compute Capability table linked below. The TensorRT detector uses the 12.x series of CUDA libraries which have minor version compatibility. The minimum driver version on the host system must be `>=525.60.13`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the NVIDIA GPU Compute Capability table linked below.
> **TODO:** NVidia claims support on compute 3.5 and 3.7, but marks it as deprecated. This would have some, but not all, Kepler GPUs as possibly working. This needs testing before making any claims of support.
To use the TensorRT detector, make sure your host system has the [nvidia-container-runtime](https://docs.docker.com/config/containers/resource_constraints/#access-an-nvidia-gpu) installed to pass through the GPU to the container and the host system has a compatible driver installed for your GPU. To use the TensorRT detector, make sure your host system has the [nvidia-container-runtime](https://docs.docker.com/config/containers/resource_constraints/#access-an-nvidia-gpu) installed to pass through the GPU to the container and the host system has a compatible driver installed for your GPU.
@ -192,22 +190,15 @@ There are improved capabilities in newer GPU architectures that TensorRT can ben
### Generate Models ### Generate Models
The model used for TensorRT must be preprocessed on the same hardware platform that they will run on. This means that each user must run additional setup to generate a model file for the TensorRT library. A script is provided that will build several common models. The model used for TensorRT must be preprocessed on the same hardware platform that they will run on. This means that each user must run additional setup to generate a model file for the TensorRT library. A script is included that will build several common models.
To generate model files, create a new folder to save the models, download the script, and launch a docker container that will run the script. The Frigate image will generate model files during startup if the specified model is not found. Processed models are stored in the `/config/model_cache` folder. Typically the `/config` path is mapped to a directory on the host already and the `model_cache` does not need to be mapped separately unless the user wants to store it in a different location on the host.
```bash To by default, the `yolov7-tiny-416` model will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. To select no model generation, set the variable to an empty string, `YOLO_MODELS=""`. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder.
mkdir trt-models
wget https://github.com/blakeblackshear/frigate/raw/master/docker/tensorrt_models.sh
chmod +x tensorrt_models.sh
docker run --gpus=all --rm -it -v `pwd`/trt-models:/tensorrt_models -v `pwd`/tensorrt_models.sh:/tensorrt_models.sh nvcr.io/nvidia/tensorrt:22.07-py3 /tensorrt_models.sh
```
The `trt-models` folder can then be mapped into your Frigate container as `trt-models` and the models referenced from the config. If your GPU does not support FP16 operations, you can pass the environment variable `USE_FP16=False` to disable it.
If your GPU does not support FP16 operations, you can pass the environment variable `-e USE_FP16=False` to the `docker run` command to disable it. Specific models can be selected by passing an environment variable to the `docker run` command or in your `docker-compose.yml` file. Use the form `-e YOLO_MODELS=yolov4-416,yolov4-tiny-416` to select one or more model names. The models available are shown below.
Specific models can be selected by passing an environment variable to the `docker run` command. Use the form `-e YOLO_MODELS=yolov4-416,yolov4-tiny-416` to select one or more model names. The models available are shown below.
``` ```
yolov3-288 yolov3-288
@ -237,11 +228,20 @@ yolov7x-640
yolov7x-320 yolov7x-320
``` ```
An example `docker-compose.yml` fragment that converts the `yolov4-608` and `yolov7x-640` models for a Pascal card would look something like this:
```yml
frigate:
environment:
- YOLO_MODELS="yolov4-608,yolov7x-640"
- USE_FP16=false
```
### Configuration Parameters ### Configuration Parameters
The TensorRT detector can be selected by specifying `tensorrt` as the model type. The GPU will need to be passed through to the docker container using the same methods described in the [Hardware Acceleration](hardware_acceleration.md#nvidia-gpu) section. If you pass through multiple GPUs, you can select which GPU is used for a detector with the `device` configuration parameter. The `device` parameter is an integer value of the GPU index, as shown by `nvidia-smi` within the container. The TensorRT detector can be selected by specifying `tensorrt` as the model type. The GPU will need to be passed through to the docker container using the same methods described in the [Hardware Acceleration](hardware_acceleration.md#nvidia-gpu) section. If you pass through multiple GPUs, you can select which GPU is used for a detector with the `device` configuration parameter. The `device` parameter is an integer value of the GPU index, as shown by `nvidia-smi` within the container.
The TensorRT detector uses `.trt` model files that are located in `/trt-models/` by default. These model file path and dimensions used will depend on which model you have generated. The TensorRT detector uses `.trt` model files that are located in `/config/model_cache/tensorrt` by default. These model path and dimensions used will depend on which model you have generated.
```yaml ```yaml
detectors: detectors:
@ -250,7 +250,7 @@ detectors:
device: 0 #This is the default, select the first GPU device: 0 #This is the default, select the first GPU
model: model:
path: /trt-models/yolov7-tiny-416.trt path: /config/model_cache/tensorrt/yolov7-tiny-416.trt
input_tensor: nchw input_tensor: nchw
input_pixel_format: rgb input_pixel_format: rgb
width: 416 width: 416

View File

@ -72,7 +72,7 @@ Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known
### TensorRT ### TensorRT
The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector). The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 12.x series of CUDA libraries. The minimum driver version on the host system must be `>=525.60.13`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector).
Inference speeds will vary greatly depending on the GPU and the model used. Inference speeds will vary greatly depending on the GPU and the model used.
`tiny` variants are faster than the equivalent non-tiny model, some known examples are below: `tiny` variants are faster than the equivalent non-tiny model, some known examples are below:

View File

@ -78,7 +78,7 @@ class TensorRtDetector(DetectionApi):
try: try:
trt.init_libnvinfer_plugins(self.trt_logger, "") trt.init_libnvinfer_plugins(self.trt_logger, "")
ctypes.cdll.LoadLibrary("/trt-models/libyolo_layer.so") ctypes.cdll.LoadLibrary("/usr/local/lib/libyolo_layer.so")
except OSError as e: except OSError as e:
logger.error( logger.error(
"ERROR: failed to load libraries. %s", "ERROR: failed to load libraries. %s",

View File

@ -1,9 +1,12 @@
# NVidia TensorRT Support (amd64 only) # NVidia TensorRT Support (amd64 only)
nvidia-pyindex; platform_machine == 'x86_64' --extra-index-url 'https://pypi.nvidia.com'
nvidia-tensorrt == 8.4.1.5; platform_machine == 'x86_64' numpy < 1.24; platform_machine == 'x86_64'
cuda-python == 11.7; platform_machine == 'x86_64' tensorrt == 8.5.3.*; platform_machine == 'x86_64'
cuda-python == 11.8; platform_machine == 'x86_64'
cython == 0.29.*; platform_machine == 'x86_64' cython == 0.29.*; platform_machine == 'x86_64'
nvidia-cuda-runtime-cu11 == 11.7.*; platform_machine == 'x86_64' nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64'
nvidia-cublas-cu11 == 11.11.*; platform_machine == 'x86_64' nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64'
nvidia-cudnn-cu11 == 8.7.*; platform_machine == 'x86_64' nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64'
nvidia-cuda-nvrtc-cu11 == 11.7.*; platform_machine == 'x86_64' nvidia-cudnn-cu11 == 8.6.0.*; platform_machine == 'x86_64'
onnx==1.14.0; platform_machine == 'x86_64'
protobuf==3.20.3; platform_machine == 'x86_64'