mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-07-26 13:47:03 +02:00
Remove TensorRT Detector from AMD64 Build (#19030)
* Don't support tensorrt detector for amd64 builds * Add logs for directing users not to use tensorrt detector * Rework docs * Fix dockerfile index * Don't undo jetson fix
This commit is contained in:
parent
fc608b31d7
commit
d69916694b
@ -6,24 +6,29 @@ ARG DEBIAN_FRONTEND=noninteractive
|
||||
# Globally set pip break-system-packages option to avoid having to specify it every time
|
||||
ARG PIP_BREAK_SYSTEM_PACKAGES=1
|
||||
|
||||
FROM tensorrt-base AS frigate-tensorrt
|
||||
FROM wheels AS trt-wheels
|
||||
ARG PIP_BREAK_SYSTEM_PACKAGES
|
||||
ENV TRT_VER=8.6.1
|
||||
|
||||
# Install TensorRT wheels
|
||||
COPY docker/tensorrt/requirements-amd64.txt /requirements-tensorrt.txt
|
||||
RUN pip3 install -U -r /requirements-tensorrt.txt && ldconfig
|
||||
COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
|
||||
RUN pip3 wheel --wheel-dir=/trt-wheels -c /requirements-wheels.txt -r /requirements-tensorrt.txt
|
||||
|
||||
FROM deps AS frigate-tensorrt
|
||||
ARG PIP_BREAK_SYSTEM_PACKAGES
|
||||
|
||||
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
|
||||
pip3 uninstall -y onnxruntime-openvino tensorflow-cpu \
|
||||
&& pip3 install -U /deps/trt-wheels/*.whl
|
||||
|
||||
COPY --from=rootfs / /
|
||||
COPY docker/tensorrt/detector/rootfs/etc/ld.so.conf.d /etc/ld.so.conf.d
|
||||
RUN ldconfig
|
||||
|
||||
WORKDIR /opt/frigate/
|
||||
COPY --from=rootfs / /
|
||||
|
||||
# Dev Container w/ TRT
|
||||
FROM devcontainer AS devcontainer-trt
|
||||
|
||||
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
|
||||
COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos
|
||||
COPY --from=trt-deps /usr/local/cuda-12.1 /usr/local/cuda
|
||||
COPY docker/tensorrt/detector/rootfs/ /
|
||||
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
|
||||
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
|
||||
pip3 install -U /deps/trt-wheels/*.whl
|
||||
|
@ -1,9 +1,61 @@
|
||||
# syntax=docker/dockerfile:1.4
|
||||
# syntax=docker/dockerfile:1.6
|
||||
|
||||
# https://askubuntu.com/questions/972516/debian-frontend-environment-variable
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ARG BASE_IMAGE
|
||||
ARG TRT_BASE=nvcr.io/nvidia/tensorrt:23.12-py3
|
||||
|
||||
# Build TensorRT-specific library
|
||||
FROM ${TRT_BASE} AS trt-deps
|
||||
|
||||
ARG TARGETARCH
|
||||
ARG COMPUTE_LEVEL
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y git build-essential cuda-nvcc-* cuda-nvtx-* libnvinfer-dev libnvinfer-plugin-dev libnvparsers-dev libnvonnxparsers-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \
|
||||
/tensorrt_libyolo.sh
|
||||
|
||||
# COPY required individual CUDA deps
|
||||
RUN mkdir -p /usr/local/cuda-deps
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
cp /usr/local/cuda-12.3/targets/x86_64-linux/lib/libcurand.so.* /usr/local/cuda-deps/ && \
|
||||
cp /usr/local/cuda-12.3/targets/x86_64-linux/lib/libnvrtc.so.* /usr/local/cuda-deps/ && \
|
||||
cd /usr/local/cuda-deps/ && \
|
||||
for lib in libnvrtc.so.*; do \
|
||||
if [[ "$lib" =~ libnvrtc.so\.([0-9]+\.[0-9]+\.[0-9]+) ]]; then \
|
||||
version="${BASH_REMATCH[1]}"; \
|
||||
ln -sf "libnvrtc.so.$version" libnvrtc.so; \
|
||||
fi; \
|
||||
done && \
|
||||
for lib in libcurand.so.*; do \
|
||||
if [[ "$lib" =~ libcurand.so\.([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) ]]; then \
|
||||
version="${BASH_REMATCH[1]}"; \
|
||||
ln -sf "libcurand.so.$version" libcurand.so; \
|
||||
fi; \
|
||||
done; \
|
||||
fi
|
||||
|
||||
# Frigate w/ TensorRT Support as separate image
|
||||
FROM deps AS tensorrt-base
|
||||
|
||||
#Disable S6 Global timeout
|
||||
ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0
|
||||
|
||||
# COPY TensorRT Model Generation Deps
|
||||
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
|
||||
COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos
|
||||
|
||||
# COPY Individual CUDA deps folder
|
||||
COPY --from=trt-deps /usr/local/cuda-deps /usr/local/cuda
|
||||
|
||||
COPY docker/tensorrt/detector/rootfs/ /
|
||||
ENV YOLO_MODELS=""
|
||||
|
||||
HEALTHCHECK --start-period=600s --start-interval=5s --interval=15s --timeout=5s --retries=3 \
|
||||
CMD curl --fail --silent --show-error http://127.0.0.1:5000/api/version || exit 1
|
||||
|
||||
FROM ${BASE_IMAGE} AS build-wheels
|
||||
ARG DEBIAN_FRONTEND
|
||||
|
||||
@ -100,4 +152,4 @@ WORKDIR /opt/frigate/
|
||||
COPY --from=rootfs / /
|
||||
|
||||
# Fixes "Error importing detector runtime: /usr/lib/aarch64-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block"
|
||||
ENV LD_PRELOAD /usr/lib/aarch64-linux-gnu/libstdc++.so.6
|
||||
ENV LD_PRELOAD /usr/lib/aarch64-linux-gnu/libstdc++.so.6
|
@ -1,57 +0,0 @@
|
||||
# syntax=docker/dockerfile:1.6
|
||||
|
||||
# https://askubuntu.com/questions/972516/debian-frontend-environment-variable
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ARG TRT_BASE=nvcr.io/nvidia/tensorrt:23.12-py3
|
||||
|
||||
# Build TensorRT-specific library
|
||||
FROM ${TRT_BASE} AS trt-deps
|
||||
|
||||
ARG TARGETARCH
|
||||
ARG COMPUTE_LEVEL
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y git build-essential cuda-nvcc-* cuda-nvtx-* libnvinfer-dev libnvinfer-plugin-dev libnvparsers-dev libnvonnxparsers-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \
|
||||
/tensorrt_libyolo.sh
|
||||
|
||||
# COPY required individual CUDA deps
|
||||
RUN mkdir -p /usr/local/cuda-deps
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
||||
cp /usr/local/cuda-12.3/targets/x86_64-linux/lib/libcurand.so.* /usr/local/cuda-deps/ && \
|
||||
cp /usr/local/cuda-12.3/targets/x86_64-linux/lib/libnvrtc.so.* /usr/local/cuda-deps/ && \
|
||||
cd /usr/local/cuda-deps/ && \
|
||||
for lib in libnvrtc.so.*; do \
|
||||
if [[ "$lib" =~ libnvrtc.so\.([0-9]+\.[0-9]+\.[0-9]+) ]]; then \
|
||||
version="${BASH_REMATCH[1]}"; \
|
||||
ln -sf "libnvrtc.so.$version" libnvrtc.so; \
|
||||
fi; \
|
||||
done && \
|
||||
for lib in libcurand.so.*; do \
|
||||
if [[ "$lib" =~ libcurand.so\.([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) ]]; then \
|
||||
version="${BASH_REMATCH[1]}"; \
|
||||
ln -sf "libcurand.so.$version" libcurand.so; \
|
||||
fi; \
|
||||
done; \
|
||||
fi
|
||||
|
||||
# Frigate w/ TensorRT Support as separate image
|
||||
FROM deps AS tensorrt-base
|
||||
|
||||
#Disable S6 Global timeout
|
||||
ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0
|
||||
|
||||
# COPY TensorRT Model Generation Deps
|
||||
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
|
||||
COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos
|
||||
|
||||
# COPY Individual CUDA deps folder
|
||||
COPY --from=trt-deps /usr/local/cuda-deps /usr/local/cuda
|
||||
|
||||
COPY docker/tensorrt/detector/rootfs/ /
|
||||
ENV YOLO_MODELS=""
|
||||
|
||||
HEALTHCHECK --start-period=600s --start-interval=5s --interval=15s --timeout=5s --retries=3 \
|
||||
CMD curl --fail --silent --show-error http://127.0.0.1:5000/api/version || exit 1
|
@ -1,7 +1,6 @@
|
||||
/usr/local/lib
|
||||
/usr/local/cuda
|
||||
/usr/local/lib/python3.11/dist-packages/tensorrt
|
||||
/usr/local/lib/python3.11/dist-packages/nvidia/cudnn/lib
|
||||
/usr/local/lib/python3.11/dist-packages/nvidia/cuda_runtime/lib
|
||||
/usr/local/lib/python3.11/dist-packages/nvidia/cublas/lib
|
||||
/usr/local/lib/python3.11/dist-packages/nvidia/cufft/lib
|
||||
/usr/local/lib/python3.11/dist-packages/nvidia/cufft/lib
|
||||
/usr/local/lib/python3.11/dist-packages/nvidia/curand/lib/
|
||||
/usr/local/lib/python3.11/dist-packages/nvidia/cuda_nvrtc/lib/
|
@ -1,17 +1,18 @@
|
||||
# NVidia TensorRT Support (amd64 only)
|
||||
--extra-index-url 'https://pypi.nvidia.com'
|
||||
numpy < 1.24; platform_machine == 'x86_64'
|
||||
tensorrt == 8.6.1; platform_machine == 'x86_64'
|
||||
tensorrt_bindings == 8.6.1; platform_machine == 'x86_64'
|
||||
cuda-python == 11.8.*; platform_machine == 'x86_64'
|
||||
cython == 3.0.*; platform_machine == 'x86_64'
|
||||
nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64'
|
||||
nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64'
|
||||
nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64'
|
||||
nvidia-cudnn-cu11 == 8.6.0.*; platform_machine == 'x86_64'
|
||||
nvidia-cudnn-cu12 == 9.5.0.*; platform_machine == 'x86_64'
|
||||
nvidia-cufft-cu11==10.*; platform_machine == 'x86_64'
|
||||
nvidia-cufft-cu12==11.*; platform_machine == 'x86_64'
|
||||
cython==3.0.*; platform_machine == 'x86_64'
|
||||
nvidia_cuda_cupti_cu12==12.5.82; platform_machine == 'x86_64'
|
||||
nvidia-cublas-cu12==12.5.3.*; platform_machine == 'x86_64'
|
||||
nvidia-cudnn-cu12==9.3.0.*; platform_machine == 'x86_64'
|
||||
nvidia-cufft-cu12==11.2.3.*; platform_machine == 'x86_64'
|
||||
nvidia-curand-cu12==10.3.6.*; platform_machine == 'x86_64'
|
||||
nvidia_cuda_nvcc_cu12==12.5.82; platform_machine == 'x86_64'
|
||||
nvidia-cuda-nvrtc-cu12==12.5.82; platform_machine == 'x86_64'
|
||||
nvidia_cuda_runtime_cu12==12.5.82; platform_machine == 'x86_64'
|
||||
nvidia_cusolver_cu12==11.6.3.*; platform_machine == 'x86_64'
|
||||
nvidia_cusparse_cu12==12.5.1.*; platform_machine == 'x86_64'
|
||||
nvidia_nccl_cu12==2.23.4; platform_machine == 'x86_64'
|
||||
nvidia_nvjitlink_cu12==12.5.82; platform_machine == 'x86_64'
|
||||
onnx==1.16.*; platform_machine == 'x86_64'
|
||||
onnxruntime-gpu==1.20.*; platform_machine == 'x86_64'
|
||||
onnxruntime-gpu==1.22.*; platform_machine == 'x86_64'
|
||||
protobuf==3.20.3; platform_machine == 'x86_64'
|
||||
|
@ -79,21 +79,13 @@ target "trt-deps" {
|
||||
inherits = ["_build_args"]
|
||||
}
|
||||
|
||||
target "tensorrt-base" {
|
||||
dockerfile = "docker/tensorrt/Dockerfile.base"
|
||||
context = "."
|
||||
contexts = {
|
||||
deps = "target:deps",
|
||||
}
|
||||
inherits = ["_build_args"]
|
||||
}
|
||||
|
||||
target "tensorrt" {
|
||||
dockerfile = "docker/tensorrt/Dockerfile.${ARCH}"
|
||||
context = "."
|
||||
contexts = {
|
||||
wget = "target:wget",
|
||||
tensorrt-base = "target:tensorrt-base",
|
||||
wheels = "target:wheels",
|
||||
deps = "target:deps",
|
||||
rootfs = "target:rootfs"
|
||||
}
|
||||
target = "frigate-tensorrt"
|
||||
|
@ -24,10 +24,13 @@ Frigate supports multiple different detectors that work on different types of ha
|
||||
- [OpenVino](#openvino-detector): OpenVino can run on Intel Arc GPUs, Intel integrated GPUs, and Intel CPUs to provide efficient object detection.
|
||||
- [ONNX](#onnx): OpenVINO will automatically be detected and used as a detector in the default Frigate image when a supported ONNX model is configured.
|
||||
|
||||
**Nvidia**
|
||||
**Nvidia GPU**
|
||||
|
||||
- [TensortRT](#nvidia-tensorrt-detector): TensorRT can run on Nvidia GPUs and Jetson devices, using one of many default models.
|
||||
- [ONNX](#onnx): TensorRT will automatically be detected and used as a detector in the `-tensorrt` or `-tensorrt-jp6` Frigate images when a supported ONNX model is configured.
|
||||
- [ONNX](#onnx): TensorRT will automatically be detected and used as a detector in the `-tensorrt` Frigate image when a supported ONNX model is configured.
|
||||
|
||||
**Nvidia Jetson**
|
||||
- [TensortRT](#nvidia-tensorrt-detector): TensorRT can run on Jetson devices, using one of many default models.
|
||||
- [ONNX](#onnx): TensorRT will automatically be detected and used as a detector in the `-tensorrt-jp6` Frigate image when a supported ONNX model is configured.
|
||||
|
||||
**Rockchip**
|
||||
|
||||
@ -399,112 +402,6 @@ model:
|
||||
|
||||
Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects.
|
||||
|
||||
## NVidia TensorRT Detector
|
||||
|
||||
Nvidia GPUs may be used for object detection using the TensorRT libraries. Due to the size of the additional libraries, this detector is only provided in images with the `-tensorrt` tag suffix, e.g. `ghcr.io/blakeblackshear/frigate:stable-tensorrt`. This detector is designed to work with Yolo models for object detection.
|
||||
|
||||
### Minimum Hardware Support
|
||||
|
||||
The TensorRT detector uses the 12.x series of CUDA libraries which have minor version compatibility. The minimum driver version on the host system must be `>=545`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the NVIDIA GPU Compute Capability table linked below.
|
||||
|
||||
To use the TensorRT detector, make sure your host system has the [nvidia-container-runtime](https://docs.docker.com/config/containers/resource_constraints/#access-an-nvidia-gpu) installed to pass through the GPU to the container and the host system has a compatible driver installed for your GPU.
|
||||
|
||||
There are improved capabilities in newer GPU architectures that TensorRT can benefit from, such as INT8 operations and Tensor cores. The features compatible with your hardware will be optimized when the model is converted to a trt file. Currently the script provided for generating the model provides a switch to enable/disable FP16 operations. If you wish to use newer features such as INT8 optimization, more work is required.
|
||||
|
||||
#### Compatibility References:
|
||||
|
||||
[NVIDIA TensorRT Support Matrix](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-841/support-matrix/index.html)
|
||||
|
||||
[NVIDIA CUDA Compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/index.html)
|
||||
|
||||
[NVIDIA GPU Compute Capability](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
### Generate Models
|
||||
|
||||
The model used for TensorRT must be preprocessed on the same hardware platform that they will run on. This means that each user must run additional setup to generate a model file for the TensorRT library. A script is included that will build several common models.
|
||||
|
||||
The Frigate image will generate model files during startup if the specified model is not found. Processed models are stored in the `/config/model_cache` folder. Typically the `/config` path is mapped to a directory on the host already and the `model_cache` does not need to be mapped separately unless the user wants to store it in a different location on the host.
|
||||
|
||||
By default, no models will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder.
|
||||
|
||||
If you have a Jetson device with DLAs (Xavier or Orin), you can generate a model that will run on the DLA by appending `-dla` to your model name, e.g. specify `YOLO_MODELS=yolov7-320-dla`. The model will run on DLA0 (Frigate does not currently support DLA1). DLA-incompatible layers will fall back to running on the GPU.
|
||||
|
||||
If your GPU does not support FP16 operations, you can pass the environment variable `USE_FP16=False` to disable it.
|
||||
|
||||
Specific models can be selected by passing an environment variable to the `docker run` command or in your `docker-compose.yml` file. Use the form `-e YOLO_MODELS=yolov4-416,yolov4-tiny-416` to select one or more model names. The models available are shown below.
|
||||
|
||||
<details>
|
||||
<summary>Available Models</summary>
|
||||
```
|
||||
yolov3-288
|
||||
yolov3-416
|
||||
yolov3-608
|
||||
yolov3-spp-288
|
||||
yolov3-spp-416
|
||||
yolov3-spp-608
|
||||
yolov3-tiny-288
|
||||
yolov3-tiny-416
|
||||
yolov4-288
|
||||
yolov4-416
|
||||
yolov4-608
|
||||
yolov4-csp-256
|
||||
yolov4-csp-512
|
||||
yolov4-p5-448
|
||||
yolov4-p5-896
|
||||
yolov4-tiny-288
|
||||
yolov4-tiny-416
|
||||
yolov4x-mish-320
|
||||
yolov4x-mish-640
|
||||
yolov7-tiny-288
|
||||
yolov7-tiny-416
|
||||
yolov7-640
|
||||
yolov7-416
|
||||
yolov7-320
|
||||
yolov7x-640
|
||||
yolov7x-320
|
||||
```
|
||||
</details>
|
||||
|
||||
An example `docker-compose.yml` fragment that converts the `yolov4-608` and `yolov7x-640` models for a Pascal card would look something like this:
|
||||
|
||||
```yml
|
||||
frigate:
|
||||
environment:
|
||||
- YOLO_MODELS=yolov7-320,yolov7x-640
|
||||
- USE_FP16=false
|
||||
```
|
||||
|
||||
If you have multiple GPUs passed through to Frigate, you can specify which one to use for the model conversion. The conversion script will use the first visible GPU, however in systems with mixed GPU models you may not want to use the default index for object detection. Add the `TRT_MODEL_PREP_DEVICE` environment variable to select a specific GPU.
|
||||
|
||||
```yml
|
||||
frigate:
|
||||
environment:
|
||||
- TRT_MODEL_PREP_DEVICE=0 # Optionally, select which GPU is used for model optimization
|
||||
```
|
||||
|
||||
### Configuration Parameters
|
||||
|
||||
The TensorRT detector can be selected by specifying `tensorrt` as the model type. The GPU will need to be passed through to the docker container using the same methods described in the [Hardware Acceleration](hardware_acceleration_video.md#nvidia-gpus) section. If you pass through multiple GPUs, you can select which GPU is used for a detector with the `device` configuration parameter. The `device` parameter is an integer value of the GPU index, as shown by `nvidia-smi` within the container.
|
||||
|
||||
The TensorRT detector uses `.trt` model files that are located in `/config/model_cache/tensorrt` by default. These model path and dimensions used will depend on which model you have generated.
|
||||
|
||||
Use the config below to work with generated TRT models:
|
||||
|
||||
```yaml
|
||||
detectors:
|
||||
tensorrt:
|
||||
type: tensorrt
|
||||
device: 0 #This is the default, select the first GPU
|
||||
|
||||
model:
|
||||
path: /config/model_cache/tensorrt/yolov7-320.trt
|
||||
labelmap_path: /labelmap/coco-80.txt
|
||||
input_tensor: nchw
|
||||
input_pixel_format: rgb
|
||||
width: 320
|
||||
height: 320
|
||||
```
|
||||
|
||||
## AMD/ROCm GPU detector
|
||||
|
||||
### Setup
|
||||
@ -802,6 +699,88 @@ To verify that the integration is working correctly, start Frigate and observe t
|
||||
|
||||
# Community Supported Detectors
|
||||
|
||||
## NVidia TensorRT Detector
|
||||
|
||||
Nvidia Jetson devices may be used for object detection using the TensorRT libraries. Due to the size of the additional libraries, this detector is only provided in images with the `-tensorrt-jp6` tag suffix, e.g. `ghcr.io/blakeblackshear/frigate:stable-tensorrt-jp6`. This detector is designed to work with Yolo models for object detection.
|
||||
|
||||
### Generate Models
|
||||
|
||||
The model used for TensorRT must be preprocessed on the same hardware platform that they will run on. This means that each user must run additional setup to generate a model file for the TensorRT library. A script is included that will build several common models.
|
||||
|
||||
The Frigate image will generate model files during startup if the specified model is not found. Processed models are stored in the `/config/model_cache` folder. Typically the `/config` path is mapped to a directory on the host already and the `model_cache` does not need to be mapped separately unless the user wants to store it in a different location on the host.
|
||||
|
||||
By default, no models will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder.
|
||||
|
||||
If you have a Jetson device with DLAs (Xavier or Orin), you can generate a model that will run on the DLA by appending `-dla` to your model name, e.g. specify `YOLO_MODELS=yolov7-320-dla`. The model will run on DLA0 (Frigate does not currently support DLA1). DLA-incompatible layers will fall back to running on the GPU.
|
||||
|
||||
If your GPU does not support FP16 operations, you can pass the environment variable `USE_FP16=False` to disable it.
|
||||
|
||||
Specific models can be selected by passing an environment variable to the `docker run` command or in your `docker-compose.yml` file. Use the form `-e YOLO_MODELS=yolov4-416,yolov4-tiny-416` to select one or more model names. The models available are shown below.
|
||||
|
||||
<details>
|
||||
<summary>Available Models</summary>
|
||||
```
|
||||
yolov3-288
|
||||
yolov3-416
|
||||
yolov3-608
|
||||
yolov3-spp-288
|
||||
yolov3-spp-416
|
||||
yolov3-spp-608
|
||||
yolov3-tiny-288
|
||||
yolov3-tiny-416
|
||||
yolov4-288
|
||||
yolov4-416
|
||||
yolov4-608
|
||||
yolov4-csp-256
|
||||
yolov4-csp-512
|
||||
yolov4-p5-448
|
||||
yolov4-p5-896
|
||||
yolov4-tiny-288
|
||||
yolov4-tiny-416
|
||||
yolov4x-mish-320
|
||||
yolov4x-mish-640
|
||||
yolov7-tiny-288
|
||||
yolov7-tiny-416
|
||||
yolov7-640
|
||||
yolov7-416
|
||||
yolov7-320
|
||||
yolov7x-640
|
||||
yolov7x-320
|
||||
```
|
||||
</details>
|
||||
|
||||
An example `docker-compose.yml` fragment that converts the `yolov4-608` and `yolov7x-640` models would look something like this:
|
||||
|
||||
```yml
|
||||
frigate:
|
||||
environment:
|
||||
- YOLO_MODELS=yolov7-320,yolov7x-640
|
||||
- USE_FP16=false
|
||||
```
|
||||
|
||||
### Configuration Parameters
|
||||
|
||||
The TensorRT detector can be selected by specifying `tensorrt` as the model type. The GPU will need to be passed through to the docker container using the same methods described in the [Hardware Acceleration](hardware_acceleration_video.md#nvidia-gpus) section. If you pass through multiple GPUs, you can select which GPU is used for a detector with the `device` configuration parameter. The `device` parameter is an integer value of the GPU index, as shown by `nvidia-smi` within the container.
|
||||
|
||||
The TensorRT detector uses `.trt` model files that are located in `/config/model_cache/tensorrt` by default. These model path and dimensions used will depend on which model you have generated.
|
||||
|
||||
Use the config below to work with generated TRT models:
|
||||
|
||||
```yaml
|
||||
detectors:
|
||||
tensorrt:
|
||||
type: tensorrt
|
||||
device: 0 #This is the default, select the first GPU
|
||||
|
||||
model:
|
||||
path: /config/model_cache/tensorrt/yolov7-320.trt
|
||||
labelmap_path: /labelmap/coco-80.txt
|
||||
input_tensor: nchw
|
||||
input_pixel_format: rgb
|
||||
width: 320
|
||||
height: 320
|
||||
```
|
||||
|
||||
## Rockchip platform
|
||||
|
||||
Hardware accelerated object detection is supported on the following SoCs:
|
||||
|
@ -136,7 +136,23 @@ Inference speeds vary greatly depending on the CPU or GPU used, some known examp
|
||||
|
||||
### TensorRT - Nvidia GPU
|
||||
|
||||
The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 12.x series of CUDA libraries. The minimum driver version on the host system must be `>=525.60.13`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector).
|
||||
Frigate is able to utilize an Nvidia GPU which supports the 12.x series of CUDA libraries.
|
||||
|
||||
### Minimum Hardware Support
|
||||
|
||||
12.x series of CUDA libraries are used which have minor version compatibility. The minimum driver version on the host system must be `>=545`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the NVIDIA GPU Compute Capability table linked below.
|
||||
|
||||
Make sure your host system has the [nvidia-container-runtime](https://docs.docker.com/config/containers/resource_constraints/#access-an-nvidia-gpu) installed to pass through the GPU to the container and the host system has a compatible driver installed for your GPU.
|
||||
|
||||
There are improved capabilities in newer GPU architectures that TensorRT can benefit from, such as INT8 operations and Tensor cores. The features compatible with your hardware will be optimized when the model is converted to a trt file. Currently the script provided for generating the model provides a switch to enable/disable FP16 operations. If you wish to use newer features such as INT8 optimization, more work is required.
|
||||
|
||||
#### Compatibility References:
|
||||
|
||||
[NVIDIA TensorRT Support Matrix](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-841/support-matrix/index.html)
|
||||
|
||||
[NVIDIA CUDA Compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/index.html)
|
||||
|
||||
[NVIDIA GPU Compute Capability](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
Inference speeds will vary greatly depending on the GPU and the model used.
|
||||
`tiny` variants are faster than the equivalent non-tiny model, some known examples are below:
|
||||
|
@ -1,5 +1,6 @@
|
||||
import ctypes
|
||||
import logging
|
||||
import platform
|
||||
|
||||
import numpy as np
|
||||
|
||||
@ -42,6 +43,13 @@ if TRT_SUPPORT:
|
||||
return logging.CRITICAL
|
||||
else:
|
||||
return logging.DEBUG
|
||||
elif platform.machine() == "x86_64":
|
||||
logger.error(
|
||||
"TensorRT detector is no longer supported on amd64 system. Please use ONNX detector instead, see https://docs.frigate.video/configuration/object_detectors#onnx for more information."
|
||||
)
|
||||
raise ImportError(
|
||||
"TensorRT detector is no longer supported on amd64 system. Please use ONNX detector instead, see https://docs.frigate.video/configuration/object_detectors#onnx for more information."
|
||||
)
|
||||
|
||||
|
||||
class TensorRTDetectorConfig(BaseDetectorConfig):
|
||||
|
Loading…
Reference in New Issue
Block a user