From a96a951e2364be756b8911b850c361c71c390699 Mon Sep 17 00:00:00 2001 From: Andrew Reiter Date: Wed, 26 Jul 2023 06:50:41 -0400 Subject: [PATCH] Nvidia Jetson ffmpeg + TensorRT support (#6458) * Non-Jetson changes Required for later commits: - Allow base image to be overridden (and don't assume its WORKDIR) - Ensure python3.9 - Map hwaccel decode presets as strings instead of lists Not required: - Fix existing documentation - Simplify hwaccel scale logic * Prepare for multi-arch tensorrt build * Add tensorrt images for Jetson boards * Add Jetson ffmpeg hwaccel * Update docs * Add CODEOWNERS * CI * Change default model from yolov7-tiny-416 to yolov7-320 In my experience the tiny models perform markedly worse without being much faster * fixup! Update docs --- .github/workflows/ci.yml | 30 ++++- CODEOWNERS | 2 + docker-compose.yml | 4 +- docker/main/Dockerfile | 19 +++- docker/main/install_deps.sh | 10 +- .../tensorrt/{Dockerfile => Dockerfile.amd64} | 21 +--- docker/tensorrt/Dockerfile.arm64 | 79 +++++++++++++ docker/tensorrt/Dockerfile.base | 26 +++++ docker/tensorrt/build_jetson_ffmpeg.sh | 59 ++++++++++ .../detector/build_python_tensorrt.sh | 28 +++++ .../s6-overlay/s6-rc.d/trt-model-prepare/run | 52 +++++++-- docker/tensorrt/detector/tensorrt_libyolo.sh | 5 +- ...equirements.txt => requirements-amd64.txt} | 0 docker/tensorrt/requirements-arm64.txt | 1 + docker/tensorrt/requirements-models-arm64.txt | 3 + docker/tensorrt/trt.hcl | 70 ++++++++++-- docker/tensorrt/trt.mk | 22 +++- docs/docs/configuration/ffmpeg_presets.md | 22 ++-- .../configuration/hardware_acceleration.md | 74 ++++++++++++ docs/docs/configuration/index.md | 4 +- docs/docs/configuration/object_detectors.md | 10 +- docs/docs/development/contributing.md | 8 +- docs/docs/frigate/hardware.md | 10 +- docs/docs/frigate/installation.md | 3 + frigate/config.py | 14 ++- frigate/ffmpeg_presets.py | 105 +++++++----------- frigate/stats.py | 10 ++ frigate/util/services.py | 15 +++ 28 files changed, 567 insertions(+), 139 deletions(-) create mode 100644 CODEOWNERS rename docker/tensorrt/{Dockerfile => Dockerfile.amd64} (62%) create mode 100644 docker/tensorrt/Dockerfile.arm64 create mode 100644 docker/tensorrt/Dockerfile.base create mode 100755 docker/tensorrt/build_jetson_ffmpeg.sh create mode 100755 docker/tensorrt/detector/build_python_tensorrt.sh rename docker/tensorrt/{requirements.txt => requirements-amd64.txt} (100%) create mode 100644 docker/tensorrt/requirements-arm64.txt create mode 100644 docker/tensorrt/requirements-models-arm64.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 66cbea55c..b35f8fca6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,7 +79,7 @@ jobs: set: | rpi.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-rpi *.cache-from=type=gha - - name: Build and push TensorRT + - name: Build and push TensorRT (x86 GPU) uses: docker/bake-action@v3 with: push: true @@ -88,6 +88,34 @@ jobs: set: | tensorrt.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-tensorrt *.cache-from=type=gha + - name: Build and push TensorRT (Jetson, Jetpack 4) + env: + ARCH: arm64 + BASE_IMAGE: timongentzsch/l4t-ubuntu20-opencv:latest + SLIM_BASE: timongentzsch/l4t-ubuntu20-opencv:latest + TRT_BASE: timongentzsch/l4t-ubuntu20-opencv:latest + uses: docker/bake-action@v3 + with: + push: true + targets: tensorrt + files: docker/tensorrt/trt.hcl + set: | + tensorrt.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-tensorrt-jp4 + *.cache-from=type=gha + - name: Build and push TensorRT (Jetson, Jetpack 5) + env: + ARCH: arm64 + BASE_IMAGE: nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime + SLIM_BASE: nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime + TRT_BASE: nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime + uses: docker/bake-action@v3 + with: + push: true + targets: tensorrt + files: docker/tensorrt/trt.hcl + set: | + tensorrt.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-tensorrt-jp5 + *.cache-from=type=gha - name: Assemble and push default build uses: int128/docker-manifest-create-action@v1 with: diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 000000000..2b99e8a17 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,2 @@ +# Community-supported boards +/docker/tensorrt/ @madsciencetist @NateMeyer diff --git a/docker-compose.yml b/docker-compose.yml index b16f652f0..be04ad0a3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,6 +21,8 @@ services: - driver: nvidia count: 1 capabilities: [gpu] + environment: + YOLO_MODELS: yolov7-320 devices: - /dev/bus/usb:/dev/bus/usb # - /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware @@ -30,8 +32,6 @@ services: - /etc/localtime:/etc/localtime:ro - ./config:/config - ./debug:/media/frigate - # Create the trt-models folder using the documented method of generating TRT models - # - ./debug/trt-models:/trt-models - /dev/bus/usb:/dev/bus/usb mqtt: container_name: mqtt diff --git a/docker/main/Dockerfile b/docker/main/Dockerfile index 06bc71c06..43795d864 100644 --- a/docker/main/Dockerfile +++ b/docker/main/Dockerfile @@ -3,11 +3,14 @@ # https://askubuntu.com/questions/972516/debian-frontend-environment-variable ARG DEBIAN_FRONTEND=noninteractive -FROM debian:11 AS base +ARG BASE_IMAGE=debian:11 +ARG SLIM_BASE=debian:11-slim + +FROM ${BASE_IMAGE} AS base FROM --platform=linux/amd64 debian:11 AS base_amd64 -FROM debian:11-slim AS slim-base +FROM ${SLIM_BASE} AS slim-base FROM slim-base AS wget ARG DEBIAN_FRONTEND @@ -123,8 +126,8 @@ RUN apt-get -qq update \ && echo "deb http://deb.debian.org/debian bullseye main contrib non-free" | tee /etc/apt/sources.list.d/raspi.list \ && apt-get -qq update \ && apt-get -qq install -y \ - python3 \ - python3-dev \ + python3.9 \ + python3.9-dev \ wget \ # opencv dependencies build-essential cmake git pkg-config libgtk-3-dev \ @@ -137,14 +140,17 @@ RUN apt-get -qq update \ gcc gfortran libopenblas-dev liblapack-dev && \ rm -rf /var/lib/apt/lists/* +# Ensure python3 defaults to python3.9 +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 + RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \ && python3 get-pip.py "pip" COPY docker/main/requirements.txt /requirements.txt -RUN pip3 install -r requirements.txt +RUN pip3 install -r /requirements.txt COPY docker/main/requirements-wheels.txt /requirements-wheels.txt -RUN pip3 wheel --wheel-dir=/wheels -r requirements-wheels.txt +RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt # Collect deps in a single layer @@ -176,6 +182,7 @@ RUN --mount=type=bind,source=docker/main/install_deps.sh,target=/deps/install_de /deps/install_deps.sh RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \ + python3 -m pip install --upgrade pip && \ pip3 install -U /deps/wheels/*.whl COPY --from=deps-rootfs / / diff --git a/docker/main/install_deps.sh b/docker/main/install_deps.sh index 9eb784ec8..61ece1b76 100755 --- a/docker/main/install_deps.sh +++ b/docker/main/install_deps.sh @@ -10,11 +10,15 @@ apt-get -qq install --no-install-recommends -y \ wget \ procps vainfo \ unzip locales tzdata libxml2 xz-utils \ + python3.9 \ python3-pip \ curl \ jq \ nethogs +# ensure python3 defaults to python3.9 +update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 + mkdir -p -m 600 /root/.gnupg # add coral repo @@ -23,8 +27,10 @@ curl -fsSLo - https://packages.cloud.google.com/apt/doc/apt-key.gpg | \ echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list echo "libedgetpu1-max libedgetpu/accepted-eula select true" | debconf-set-selections -# enable non-free repo -sed -i -e's/ main/ main contrib non-free/g' /etc/apt/sources.list +# enable non-free repo in Debian +if grep -q "Debian" /etc/issue; then + sed -i -e's/ main/ main contrib non-free/g' /etc/apt/sources.list +fi # coral drivers apt-get -qq update diff --git a/docker/tensorrt/Dockerfile b/docker/tensorrt/Dockerfile.amd64 similarity index 62% rename from docker/tensorrt/Dockerfile rename to docker/tensorrt/Dockerfile.amd64 index 564cc3b5b..075726eda 100644 --- a/docker/tensorrt/Dockerfile +++ b/docker/tensorrt/Dockerfile.amd64 @@ -9,28 +9,11 @@ ARG DEBIAN_FRONTEND ARG TARGETARCH # Add TensorRT wheels to another folder -COPY docker/tensorrt/requirements.txt /requirements-tensorrt.txt +COPY docker/tensorrt/requirements-amd64.txt /requirements-tensorrt.txt RUN mkdir -p /trt-wheels && pip3 wheel --wheel-dir=/trt-wheels -r /requirements-tensorrt.txt -# Build TensorRT-specific library -FROM nvcr.io/nvidia/tensorrt:23.03-py3 AS trt-deps - -RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \ - /tensorrt_libyolo.sh - -# Frigate w/ TensorRT Support as separate image -FROM deps AS frigate-tensorrt - -#Disable S6 Global timeout -ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0 - +FROM tensorrt-base AS frigate-tensorrt ENV TRT_VER=8.5.3 -ENV YOLO_MODELS="yolov7-tiny-416" - -COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so -COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos -COPY docker/tensorrt/detector/rootfs/ / - RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ pip3 install -U /deps/trt-wheels/*.whl && \ ldconfig diff --git a/docker/tensorrt/Dockerfile.arm64 b/docker/tensorrt/Dockerfile.arm64 new file mode 100644 index 000000000..70184bf9b --- /dev/null +++ b/docker/tensorrt/Dockerfile.arm64 @@ -0,0 +1,79 @@ +# syntax=docker/dockerfile:1.4 + +# https://askubuntu.com/questions/972516/debian-frontend-environment-variable +ARG DEBIAN_FRONTEND=noninteractive + +ARG BASE_IMAGE +FROM ${BASE_IMAGE} AS build-wheels +ARG DEBIAN_FRONTEND + +# Use a separate container to build wheels to prevent build dependencies in final image +RUN apt-get -qq update \ + && apt-get -qq install -y --no-install-recommends \ + python3.9 python3.9-dev \ + wget build-essential cmake git \ + && rm -rf /var/lib/apt/lists/* + +# Ensure python3 defaults to python3.9 +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 + +RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \ + && python3 get-pip.py "pip" + + +FROM build-wheels AS trt-wheels +ARG DEBIAN_FRONTEND +ARG TARGETARCH + +# python-tensorrt build deps are 3.4 GB! +RUN apt-get update \ + && apt-get install -y ccache cuda-cudart-dev-* cuda-nvcc-* libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev \ + && ([ -e /usr/local/cuda ] || ln -s /usr/local/cuda-* /usr/local/cuda) \ + && rm -rf /var/lib/apt/lists/*; + +# Determine version of tensorrt already installed in base image, e.g. "Version: 8.4.1-1+cuda11.4" +RUN NVINFER_VER=$(dpkg -s libnvinfer8 | grep -Po "Version: \K.*") \ + && echo $NVINFER_VER | grep -Po "^\d+\.\d+\.\d+" > /etc/TENSORRT_VER + +RUN --mount=type=bind,source=docker/tensorrt/detector/build_python_tensorrt.sh,target=/deps/build_python_tensorrt.sh \ + --mount=type=cache,target=/root/.ccache \ + export PATH="/usr/lib/ccache:$PATH" CCACHE_DIR=/root/.ccache CCACHE_MAXSIZE=2G \ + && TENSORRT_VER=$(cat /etc/TENSORRT_VER) /deps/build_python_tensorrt.sh + +COPY docker/tensorrt/requirements-arm64.txt /requirements-tensorrt.txt +RUN pip3 wheel --wheel-dir=/trt-wheels -r /requirements-tensorrt.txt + +FROM build-wheels AS trt-model-wheels +ARG DEBIAN_FRONTEND + +RUN apt-get update \ + && apt-get install -y protobuf-compiler libprotobuf-dev \ + && rm -rf /var/lib/apt/lists/* +RUN --mount=type=bind,source=docker/tensorrt/requirements-models-arm64.txt,target=/requirements-tensorrt-models.txt \ + pip3 wheel --wheel-dir=/trt-model-wheels -r /requirements-tensorrt-models.txt + +FROM wget AS jetson-ffmpeg +ARG DEBIAN_FRONTEND +ENV CCACHE_DIR /root/.ccache +ENV CCACHE_MAXSIZE 2G +RUN --mount=type=bind,source=docker/tensorrt/build_jetson_ffmpeg.sh,target=/deps/build_jetson_ffmpeg.sh \ + --mount=type=cache,target=/root/.ccache \ + /deps/build_jetson_ffmpeg.sh + +# Frigate w/ TensorRT for NVIDIA Jetson platforms +FROM tensorrt-base AS frigate-tensorrt +RUN apt-get update \ + && apt-get install -y python-is-python3 libprotobuf17 \ + && rm -rf /var/lib/apt/lists/* + +RUN rm -rf /usr/lib/btbn-ffmpeg/ +COPY --from=jetson-ffmpeg /rootfs / + +COPY --from=trt-wheels /etc/TENSORRT_VER /etc/TENSORRT_VER +RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ + --mount=type=bind,from=trt-model-wheels,source=/trt-model-wheels,target=/deps/trt-model-wheels \ + pip3 install -U /deps/trt-wheels/*.whl /deps/trt-model-wheels/*.whl \ + && ldconfig + +WORKDIR /opt/frigate/ +COPY --from=rootfs / / diff --git a/docker/tensorrt/Dockerfile.base b/docker/tensorrt/Dockerfile.base new file mode 100644 index 000000000..331a328b7 --- /dev/null +++ b/docker/tensorrt/Dockerfile.base @@ -0,0 +1,26 @@ +# syntax=docker/dockerfile:1.4 + +# https://askubuntu.com/questions/972516/debian-frontend-environment-variable +ARG DEBIAN_FRONTEND=noninteractive + +ARG TRT_BASE=nvcr.io/nvidia/tensorrt:23.03-py3 + +# Build TensorRT-specific library +FROM ${TRT_BASE} AS trt-deps + +RUN apt-get update \ + && apt-get install -y git build-essential cuda-nvcc-* cuda-nvtx-* libnvinfer-dev libnvinfer-plugin-dev libnvparsers-dev libnvonnxparsers-dev \ + && rm -rf /var/lib/apt/lists/* +RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \ + /tensorrt_libyolo.sh + +# Frigate w/ TensorRT Support as separate image +FROM deps AS tensorrt-base + +#Disable S6 Global timeout +ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0 + +COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so +COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos +COPY docker/tensorrt/detector/rootfs/ / +ENV YOLO_MODELS="yolov7-320" diff --git a/docker/tensorrt/build_jetson_ffmpeg.sh b/docker/tensorrt/build_jetson_ffmpeg.sh new file mode 100755 index 000000000..8c532ebc3 --- /dev/null +++ b/docker/tensorrt/build_jetson_ffmpeg.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# For jetson platforms, build ffmpeg with custom patches. NVIDIA supplies a deb +# with accelerated decoding, but it doesn't have accelerated scaling or encoding + +set -euxo pipefail + +INSTALL_PREFIX=/rootfs/usr/local + +apt-get -qq update +apt-get -qq install -y --no-install-recommends build-essential ccache clang cmake pkg-config +apt-get -qq install -y --no-install-recommends libx264-dev libx265-dev + +pushd /tmp + +# Install libnvmpi to enable nvmpi decoders (h264_nvmpi, hevc_nvmpi) +if [ -e /usr/local/cuda-10.2 ]; then + # assume Jetpack 4.X + wget -q https://developer.nvidia.com/embedded/L4T/r32_Release_v5.0/T186/Jetson_Multimedia_API_R32.5.0_aarch64.tbz2 -O jetson_multimedia_api.tbz2 +else + # assume Jetpack 5.X + wget -q https://developer.nvidia.com/downloads/embedded/l4t/r35_release_v3.1/release/jetson_multimedia_api_r35.3.1_aarch64.tbz2 -O jetson_multimedia_api.tbz2 +fi +tar xaf jetson_multimedia_api.tbz2 -C / && rm jetson_multimedia_api.tbz2 + +wget -q https://github.com/madsciencetist/jetson-ffmpeg/archive/refs/heads/master.zip +unzip master.zip && rm master.zip && cd jetson-ffmpeg-master +LD_LIBRARY_PATH=$(pwd)/stubs:$LD_LIBRARY_PATH # tegra multimedia libs aren't available in image, so use stubs for ffmpeg build +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX +make -j$(nproc) +make install +cd ../../ + +# Install nv-codec-headers to enable ffnvcodec filters (scale_cuda) +wget -q https://github.com/FFmpeg/nv-codec-headers/archive/refs/heads/master.zip +unzip master.zip && rm master.zip && cd nv-codec-headers-master +make PREFIX=$INSTALL_PREFIX install +cd ../ && rm -rf nv-codec-headers-master + +# Build ffmpeg with nvmpi patch +wget -q https://ffmpeg.org/releases/ffmpeg-6.0.tar.xz +tar xaf ffmpeg-*.tar.xz && rm ffmpeg-*.tar.xz && cd ffmpeg-* +patch -p1 < ../jetson-ffmpeg-master/ffmpeg_patches/ffmpeg6.0_nvmpi.patch +export PKG_CONFIG_PATH=$INSTALL_PREFIX/lib/pkgconfig +# enable Jetson codecs but disable dGPU codecs +./configure --cc='ccache gcc' --cxx='ccache g++' \ + --enable-shared --disable-static --prefix=$INSTALL_PREFIX \ + --enable-gpl --enable-libx264 --enable-libx265 \ + --enable-nvmpi --enable-ffnvcodec --enable-cuda-llvm \ + --disable-cuvid --disable-nvenc --disable-nvdec \ + || { cat ffbuild/config.log && false; } +make -j$(nproc) +make install +cd ../ + +rm -rf /var/lib/apt/lists/* +popd diff --git a/docker/tensorrt/detector/build_python_tensorrt.sh b/docker/tensorrt/detector/build_python_tensorrt.sh new file mode 100755 index 000000000..21b6ae268 --- /dev/null +++ b/docker/tensorrt/detector/build_python_tensorrt.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -euxo pipefail + +mkdir -p /trt-wheels + +if [[ "${TARGETARCH}" == "arm64" ]]; then + + # NVIDIA supplies python-tensorrt for python3.8, but frigate uses python3.9, + # so we must build python-tensorrt ourselves. + + # Get python-tensorrt source + mkdir /workspace + cd /workspace + git clone -b ${TENSORRT_VER} https://github.com/NVIDIA/TensorRT.git --depth=1 + + # Collect dependencies + EXT_PATH=/workspace/external && mkdir -p $EXT_PATH + pip3 install pybind11 && ln -s /usr/local/lib/python3.9/dist-packages/pybind11 $EXT_PATH/pybind11 + ln -s /usr/include/python3.9 $EXT_PATH/python3.9 + ln -s /usr/include/aarch64-linux-gnu/NvOnnxParser.h /workspace/TensorRT/parsers/onnx/ + + # Build wheel + cd /workspace/TensorRT/python + EXT_PATH=$EXT_PATH PYTHON_MAJOR_VERSION=3 PYTHON_MINOR_VERSION=9 TARGET_ARCHITECTURE=aarch64 /bin/bash ./build.sh + mv build/dist/*.whl /trt-wheels/ + +fi diff --git a/docker/tensorrt/detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run b/docker/tensorrt/detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run index 5f0e43553..93b132a38 100755 --- a/docker/tensorrt/detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run +++ b/docker/tensorrt/detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run @@ -2,27 +2,35 @@ # shellcheck shell=bash # Generate models for the TensorRT detector +# One or more comma-separated models may be specified via the YOLO_MODELS env. +# Append "-dla" to the model name to generate a DLA model with GPU fallback; +# otherwise a GPU-only model will be generated. + set -o errexit -o nounset -o pipefail MODEL_CACHE_DIR=${MODEL_CACHE_DIR:-"/config/model_cache/tensorrt"} +TRT_VER=${TRT_VER:-$(cat /etc/TENSORRT_VER)} OUTPUT_FOLDER="${MODEL_CACHE_DIR}/${TRT_VER}" # Create output folder mkdir -p ${OUTPUT_FOLDER} FIRST_MODEL=true +MODEL_DOWNLOAD="" MODEL_CONVERT="" for model in ${YOLO_MODELS//,/ } do # Remove old link in case path/version changed rm -f ${MODEL_CACHE_DIR}/${model}.trt - + if [[ ! -f ${OUTPUT_FOLDER}/${model}.trt ]]; then if [[ ${FIRST_MODEL} = true ]]; then + MODEL_DOWNLOAD="${model%-dla}"; MODEL_CONVERT="${model}" FIRST_MODEL=false; else + MODEL_DOWNLOAD+=",${model%-dla}"; MODEL_CONVERT+=",${model}"; fi else @@ -35,19 +43,49 @@ if [[ -z ${MODEL_CONVERT} ]]; then exit 0 fi +# On Jetpack 4.6, the nvidia container runtime will mount several host nvidia libraries into the +# container which should not be present in the image - if they are, TRT model generation will +# fail or produce invalid models. Thus we must request the user to install them on the host in +# order to run libyolo here. +# On Jetpack 5.0, these libraries are not mounted by the runtime and are supplied by the image. +if [[ "$(arch)" == "aarch64" ]]; then + if [[ ! -e /usr/lib/aarch64-linux-gnu/tegra ]]; then + echo "ERROR: Container must be launched with nvidia runtime" + exit 1 + elif [[ ! -e /usr/lib/aarch64-linux-gnu/libnvinfer.so.8 || + ! -e /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so.8 || + ! -e /usr/lib/aarch64-linux-gnu/libnvparsers.so.8 || + ! -e /usr/lib/aarch64-linux-gnu/libnvonnxparser.so.8 ]]; then + echo "ERROR: Please run the following on the HOST:" + echo " sudo apt install libnvinfer8 libnvinfer-plugin8 libnvparsers8 libnvonnxparsers8 nvidia-container" + exit 1 + fi +fi + echo "Generating the following TRT Models: ${MODEL_CONVERT}" # Build trt engine cd /usr/local/src/tensorrt_demos/yolo -# Download yolo weights -./download_yolo.sh $MODEL_CONVERT > /dev/null +echo "Downloading yolo weights" +./download_yolo.sh $MODEL_DOWNLOAD 2> /dev/null for model in ${MODEL_CONVERT//,/ } do - echo "Converting ${model} model" - python3 yolo_to_onnx.py -m ${model} > /dev/null - python3 onnx_to_tensorrt.py -m ${model} > /dev/null - cp ${model}.trt ${OUTPUT_FOLDER}/${model}.trt + python3 yolo_to_onnx.py -m ${model%-dla} > /dev/null + + echo -e "\nGenerating ${model}.trt. This may take a few minutes.\n"; start=$(date +%s) + if [[ $model == *-dla ]]; then + cmd="python3 onnx_to_tensorrt.py -m ${model%-dla} --dla_core 0" + else + cmd="python3 onnx_to_tensorrt.py -m ${model}" + fi + $cmd > /tmp/onnx_to_tensorrt.log || { cat /tmp/onnx_to_tensorrt.log && continue; } + + mv ${model%-dla}.trt ${OUTPUT_FOLDER}/${model}.trt; ln -s ${OUTPUT_FOLDER}/${model}.trt ${MODEL_CACHE_DIR}/${model}.trt + echo "Generated ${model}.trt in $(($(date +%s)-start)) seconds" done + +echo "Available tensorrt models:" +cd ${OUTPUT_FOLDER} && ls *.trt; diff --git a/docker/tensorrt/detector/tensorrt_libyolo.sh b/docker/tensorrt/detector/tensorrt_libyolo.sh index e6fc415e5..91b9340a9 100755 --- a/docker/tensorrt/detector/tensorrt_libyolo.sh +++ b/docker/tensorrt/detector/tensorrt_libyolo.sh @@ -8,7 +8,10 @@ SCRIPT_DIR="/usr/local/src/tensorrt_demos" git clone --depth 1 https://github.com/NateMeyer/tensorrt_demos.git -b conditional_download # Build libyolo -cd ./tensorrt_demos/plugins && make all +if [ ! -e /usr/local/cuda ]; then + ln -s /usr/local/cuda-* /usr/local/cuda +fi +cd ./tensorrt_demos/plugins && make all -j$(nproc) cp libyolo_layer.so /usr/local/lib/libyolo_layer.so # Store yolo scripts for later conversion diff --git a/docker/tensorrt/requirements.txt b/docker/tensorrt/requirements-amd64.txt similarity index 100% rename from docker/tensorrt/requirements.txt rename to docker/tensorrt/requirements-amd64.txt diff --git a/docker/tensorrt/requirements-arm64.txt b/docker/tensorrt/requirements-arm64.txt new file mode 100644 index 000000000..9b12dac33 --- /dev/null +++ b/docker/tensorrt/requirements-arm64.txt @@ -0,0 +1 @@ +cuda-python == 11.7; platform_machine == 'aarch64' diff --git a/docker/tensorrt/requirements-models-arm64.txt b/docker/tensorrt/requirements-models-arm64.txt new file mode 100644 index 000000000..9d4ae7b44 --- /dev/null +++ b/docker/tensorrt/requirements-models-arm64.txt @@ -0,0 +1,3 @@ +onnx == 1.9.0; platform_machine == 'aarch64' +protobuf == 3.20.3; platform_machine == 'aarch64' +numpy == 1.23.*; platform_machine == 'aarch64' diff --git a/docker/tensorrt/trt.hcl b/docker/tensorrt/trt.hcl index 589d0b73b..56e294100 100644 --- a/docker/tensorrt/trt.hcl +++ b/docker/tensorrt/trt.hcl @@ -1,19 +1,47 @@ +variable "ARCH" { + default = "amd64" +} +variable "BASE_IMAGE" { + default = null +} +variable "SLIM_BASE" { + default = null +} +variable "TRT_BASE" { + default = null +} + +target "_build_args" { + args = { + BASE_IMAGE = BASE_IMAGE, + SLIM_BASE = SLIM_BASE, + TRT_BASE = TRT_BASE + } + platforms = ["linux/${ARCH}"] +} + +target wget { + dockerfile = "docker/main/Dockerfile" + target = "wget" + inherits = ["_build_args"] +} + target deps { dockerfile = "docker/main/Dockerfile" - platforms = ["linux/amd64"] target = "deps" + inherits = ["_build_args"] } target rootfs { dockerfile = "docker/main/Dockerfile" - platforms = ["linux/amd64"] target = "rootfs" + inherits = ["_build_args"] } target wheels { dockerfile = "docker/main/Dockerfile" - platforms = ["linux/amd64"] target = "wheels" + inherits = ["_build_args"] } target devcontainer { @@ -22,27 +50,45 @@ target devcontainer { target = "devcontainer" } -target tensorrt { - dockerfile = "docker/tensorrt/Dockerfile" +target "trt-deps" { + dockerfile = "docker/tensorrt/Dockerfile.base" context = "." contexts = { deps = "target:deps", + } + inherits = ["_build_args"] +} + +target "tensorrt-base" { + dockerfile = "docker/tensorrt/Dockerfile.base" + context = "." + contexts = { + deps = "target:deps", + } + inherits = ["_build_args"] +} + +target "tensorrt" { + dockerfile = "docker/tensorrt/Dockerfile.${ARCH}" + context = "." + contexts = { + wget = "target:wget", + tensorrt-base = "target:tensorrt-base", rootfs = "target:rootfs" wheels = "target:wheels" } - platforms = ["linux/amd64"] target = "frigate-tensorrt" + inherits = ["_build_args"] } -target devcontainer-trt { - dockerfile = "docker/tensorrt/Dockerfile" +target "devcontainer-trt" { + dockerfile = "docker/tensorrt/Dockerfile.amd64" context = "." contexts = { - deps = "target:deps", - rootfs = "target:rootfs" - wheels = "target:wheels" + wheels = "target:wheels", + trt-deps = "target:trt-deps", devcontainer = "target:devcontainer" } platforms = ["linux/amd64"] target = "devcontainer-trt" -} \ No newline at end of file +} diff --git a/docker/tensorrt/trt.mk b/docker/tensorrt/trt.mk index bc01a83b3..0e01c1402 100644 --- a/docker/tensorrt/trt.mk +++ b/docker/tensorrt/trt.mk @@ -1,10 +1,26 @@ BOARDS += trt +JETPACK4_BASE ?= timongentzsch/l4t-ubuntu20-opencv:latest # L4T 32.7.1 JetPack 4.6.1 +JETPACK5_BASE ?= nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime # L4T 35.3.1 JetPack 5.1.1 +X86_DGPU_ARGS := ARCH=amd64 +JETPACK4_ARGS := ARCH=arm64 BASE_IMAGE=$(JETPACK4_BASE) SLIM_BASE=$(JETPACK4_BASE) TRT_BASE=$(JETPACK4_BASE) +JETPACK5_ARGS := ARCH=arm64 BASE_IMAGE=$(JETPACK5_BASE) SLIM_BASE=$(JETPACK5_BASE) TRT_BASE=$(JETPACK5_BASE) + local-trt: version - docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt tensorrt + $(X86_DGPU_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt tensorrt + +local-trt-jp4: version + $(JETPACK4_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt-jp4 tensorrt + +local-trt-jp5: version + $(JETPACK5_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt-jp5 tensorrt build-trt: - docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt + $(X86_DGPU_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt + $(JETPACK4_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt + $(JETPACK5_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt push-trt: build-trt - docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt \ No newline at end of file + $(X86_DGPU_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt + $(JETPACK4_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt + $(JETPACK5_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt diff --git a/docs/docs/configuration/ffmpeg_presets.md b/docs/docs/configuration/ffmpeg_presets.md index 66747350e..65ef1a35f 100644 --- a/docs/docs/configuration/ffmpeg_presets.md +++ b/docs/docs/configuration/ffmpeg_presets.md @@ -11,16 +11,18 @@ It is highly recommended to use hwaccel presets in the config. These presets not See [the hwaccel docs](/configuration/hardware_acceleration.md) for more info on how to setup hwaccel for your GPU / iGPU. -| Preset | Usage | Other Notes | -| --------------------- | ---------------------------- | ----------------------------------------------------- | -| preset-rpi-32-h264 | 32 bit Rpi with h264 stream | | -| preset-rpi-64-h264 | 64 bit Rpi with h264 stream | | -| preset-vaapi | Intel & AMD VAAPI | Check hwaccel docs to ensure correct driver is chosen | -| preset-intel-qsv-h264 | Intel QSV with h264 stream | If issues occur recommend using vaapi preset instead | -| preset-intel-qsv-h265 | Intel QSV with h265 stream | If issues occur recommend using vaapi preset instead | -| preset-nvidia-h264 | Nvidia GPU with h264 stream | | -| preset-nvidia-h265 | Nvidia GPU with h265 stream | | -| preset-nvidia-mjpeg | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264 | +| Preset | Usage | Other Notes | +| --------------------- | ------------------------------ | ----------------------------------------------------- | +| preset-rpi-32-h264 | 32 bit Rpi with h264 stream | | +| preset-rpi-64-h264 | 64 bit Rpi with h264 stream | | +| preset-vaapi | Intel & AMD VAAPI | Check hwaccel docs to ensure correct driver is chosen | +| preset-intel-qsv-h264 | Intel QSV with h264 stream | If issues occur recommend using vaapi preset instead | +| preset-intel-qsv-h265 | Intel QSV with h265 stream | If issues occur recommend using vaapi preset instead | +| preset-nvidia-h264 | Nvidia GPU with h264 stream | | +| preset-nvidia-h265 | Nvidia GPU with h265 stream | | +| preset-nvidia-mjpeg | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264 | +| preset-jetson-h264 | Nvidia Jetson with h264 stream | | +| preset-jetson-h265 | Nvidia Jetson with h265 stream | | ### Input Args Presets diff --git a/docs/docs/configuration/hardware_acceleration.md b/docs/docs/configuration/hardware_acceleration.md index cb042c860..3241c4b77 100644 --- a/docs/docs/configuration/hardware_acceleration.md +++ b/docs/docs/configuration/hardware_acceleration.md @@ -246,3 +246,77 @@ If you do not see these processes, check the `docker logs` for the container and These instructions were originally based on the [Jellyfin documentation](https://jellyfin.org/docs/general/administration/hardware-acceleration.html#nvidia-hardware-acceleration-on-docker-linux). # Community Supported + +## NVIDIA Jetson (Orin AGX, Orin NX, Orin Nano*, Xavier AGX, Xavier NX, TX2, TX1, Nano) + +A separate set of docker images is available that is based on Jetpack/L4T. They comes with an `ffmpeg` build +with codecs that use the Jetson's dedicated media engine. If your Jetson host is running Jetpack 4.6, use the +`frigate-tensorrt-jp4` image, or if your Jetson host is running Jetpack 5.0+, use the `frigate-tensorrt-jp5` +image. Note that the Orin Nano has no video encoder, so frigate will use software encoding on this platform, +but the image will still allow hardware decoding and tensorrt object detection. + +You will need to use the image with the nvidia container runtime: + +### Docker Run CLI - Jetson + +```bash +docker run -d \ + ... + --runtime nvidia + ghcr.io/blakeblackshear/frigate-tensorrt-jp5 +``` + +### Docker Compose - Jetson + +```yaml +version: '2.4' +services: + frigate: + ... + image: ghcr.io/blakeblackshear/frigate-tensorrt-jp5 + runtime: nvidia # Add this +``` + +:::note + +The `runtime:` tag is not supported on older versions of docker-compose. If you run into this, you can instead use the nvidia runtime system-wide by adding `"default-runtime": "nvidia"` to `/etc/docker/daemon.json`: + +``` +{ + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + }, + "default-runtime": "nvidia" +} +``` + +::: + +### Setup Decoder + +The decoder you need to pass in the `hwaccel_args` will depend on the input video. + +A list of supported codecs (you can use `ffmpeg -decoders | grep nvmpi` in the container to get the ones your card supports) + +``` + V..... h264_nvmpi h264 (nvmpi) (codec h264) + V..... hevc_nvmpi hevc (nvmpi) (codec hevc) + V..... mpeg2_nvmpi mpeg2 (nvmpi) (codec mpeg2video) + V..... mpeg4_nvmpi mpeg4 (nvmpi) (codec mpeg4) + V..... vp8_nvmpi vp8 (nvmpi) (codec vp8) + V..... vp9_nvmpi vp9 (nvmpi) (codec vp9) +``` + +For example, for H264 video, you'll select `preset-jetson-h264`. + +```yaml +ffmpeg: + hwaccel_args: preset-jetson-h264 +``` + +If everything is working correctly, you should see a significant reduction in ffmpeg CPU load and power consumption. +Verify that hardware decoding is working by running `jtop` (`sudo pip3 install -U jetson-stats`), which should show +that NVDEC/NVDEC1 are in use. diff --git a/docs/docs/configuration/index.md b/docs/docs/configuration/index.md index eef00550d..d3b6e6a40 100644 --- a/docs/docs/configuration/index.md +++ b/docs/docs/configuration/index.md @@ -101,7 +101,7 @@ detectors: # Required: name of the detector detector_name: # Required: type of the detector - # Frigate provided types include 'cpu', 'edgetpu', and 'openvino' (default: shown below) + # Frigate provided types include 'cpu', 'edgetpu', 'openvino' and 'tensorrt' (default: shown below) # Additional detector types can also be plugged in. # Detectors may require additional configuration. # Refer to the Detectors configuration page for more information. @@ -414,6 +414,8 @@ snapshots: # Optional: Per object retention days objects: person: 15 + # Optional: quality of the encoded jpeg, 0-100 (default: shown below) + quality: 70 # Optional: RTMP configuration # NOTE: RTMP is deprecated in favor of restream diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 93c439b64..51a25702b 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -196,7 +196,9 @@ The model used for TensorRT must be preprocessed on the same hardware platform t The Frigate image will generate model files during startup if the specified model is not found. Processed models are stored in the `/config/model_cache` folder. Typically the `/config` path is mapped to a directory on the host already and the `model_cache` does not need to be mapped separately unless the user wants to store it in a different location on the host. -To by default, the `yolov7-tiny-416` model will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. To select no model generation, set the variable to an empty string, `YOLO_MODELS=""`. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder. +By default, the `yolov7-320` model will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. To select no model generation, set the variable to an empty string, `YOLO_MODELS=""`. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder. + +If you have a Jetson device with DLAs (Xavier or Orin), you can generate a model that will run on the DLA by appending `-dla` to your model name, e.g. specify `YOLO_MODELS=yolov7-320-dla`. The model will run on DLA0 (Frigate does not currently support DLA1). DLA-incompatible layers will fall back to running on the GPU. If your GPU does not support FP16 operations, you can pass the environment variable `USE_FP16=False` to disable it. @@ -252,11 +254,11 @@ detectors: device: 0 #This is the default, select the first GPU model: - path: /config/model_cache/tensorrt/yolov7-tiny-416.trt + path: /config/model_cache/tensorrt/yolov7-320.trt input_tensor: nchw input_pixel_format: rgb - width: 416 - height: 416 + width: 320 + height: 320 ``` ## Deepstack / CodeProject.AI Server Detector diff --git a/docs/docs/development/contributing.md b/docs/docs/development/contributing.md index 3045546c6..b98ceb035 100644 --- a/docs/docs/development/contributing.md +++ b/docs/docs/development/contributing.md @@ -101,12 +101,18 @@ This should show <50% CPU in top, and ~80% CPU without `-c:v h264_v4l2m2m`. ffmpeg -c:v h264_v4l2m2m -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null ``` -**NVIDIA** +**NVIDIA GPU** ```shell ffmpeg -c:v h264_cuvid -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null ``` +**NVIDIA Jetson** + +```shell +ffmpeg -c:v h264_nvmpi -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null +``` + **VAAPI** ```shell diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index 5daf8fe3b..eb0a5eb61 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -70,7 +70,7 @@ Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known | Intel i5 1135G7 | 10 - 15 ms | | | Intel i5 12600K | ~ 15 ms | Inference speeds on CPU were ~ 35 ms | -### TensorRT +### TensorRT - Nvidia GPU The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 12.x series of CUDA libraries. The minimum driver version on the host system must be `>=525.60.13`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector). @@ -87,6 +87,14 @@ Inference speeds will vary greatly depending on the GPU and the model used. | Quadro P400 2GB | 20 - 25 ms | | Quadro P2000 | ~ 12 ms | +### Community Supported: + +#### Nvidia Jetson + +Frigate supports all Jetson boards, from the inexpensive Jetson Nano to the powerful Jetson Orin AGX. It will [make use of the Jetson's hardware media engine](/configuration/hardware_acceleration#nvidia-jetson-orin-agx-orin-nx-orin-nano-xavier-agx-xavier-nx-tx2-tx1-nano) when configured with the [appropriate presets](/configuration/ffmpeg_presets#hwaccel-presets), and will make use of the Jetson's GPU and DLA for object detection when configured with the [TensorRT detector](/configuration/object_detectors#nvidia-tensorrt-detector). + +Inference speed will vary depending on the YOLO model, jetson platform and jetson nvpmodel (GPU/DLA/EMC clock speed). It is typically 20-40 ms for most models. The DLA is more efficient than the GPU, but not faster, so using the DLA will reduce power consumption but will slightly increase inference time. + ## What does Frigate use the CPU for and what does it use a detector for? (ELI5 Version) This is taken from a [user question on reddit](https://www.reddit.com/r/homeassistant/comments/q8mgau/comment/hgqbxh5/?utm_source=share&utm_medium=web2x&context=3). Modified slightly for clarity. diff --git a/docs/docs/frigate/installation.md b/docs/docs/frigate/installation.md index b8f33da77..9d334f43d 100644 --- a/docs/docs/frigate/installation.md +++ b/docs/docs/frigate/installation.md @@ -93,6 +93,9 @@ The following officially supported builds are available: The following community supported builds are available: +`ghcr.io/blakeblackshear/frigate:stable-tensorrt-jp5` - Frigate build optimized for nvidia Jetson devices running Jetpack 5 +`ghcr.io/blakeblackshear/frigate:stable-tensorrt-jp4` - Frigate build optimized for nvidia Jetson devices running Jetpack 4.6 + ::: ```yaml diff --git a/frigate/config.py b/frigate/config.py index 59e086989..101775f6d 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel): ffmpeg_input.global_args or self.ffmpeg.global_args ) hwaccel_args = get_ffmpeg_arg_list( - parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args) + parse_preset_hardware_acceleration_decode( + ffmpeg_input.hwaccel_args, + self.detect.fps, + self.detect.width, + self.detect.height, + ) or ffmpeg_input.hwaccel_args - or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args) + or parse_preset_hardware_acceleration_decode( + self.ffmpeg.hwaccel_args, + self.detect.fps, + self.detect.width, + self.detect.height, + ) or self.ffmpeg.hwaccel_args ) input_args = get_ffmpeg_arg_list( diff --git a/frigate/ffmpeg_presets.py b/frigate/ffmpeg_presets.py index 43d2504bd..571781551 100644 --- a/frigate/ffmpeg_presets.py +++ b/frigate/ffmpeg_presets.py @@ -55,58 +55,16 @@ _user_agent_args = [ ] PRESETS_HW_ACCEL_DECODE = { - "preset-rpi-32-h264": ["-c:v:1", "h264_v4l2m2m"], - "preset-rpi-64-h264": ["-c:v:1", "h264_v4l2m2m"], - "preset-vaapi": [ - "-hwaccel_flags", - "allow_profile_mismatch", - "-hwaccel", - "vaapi", - "-hwaccel_device", - _gpu_selector.get_selected_gpu(), - "-hwaccel_output_format", - "vaapi", - ], - "preset-intel-qsv-h264": [ - "-hwaccel", - "qsv", - "-qsv_device", - _gpu_selector.get_selected_gpu(), - "-hwaccel_output_format", - "qsv", - "-c:v", - "h264_qsv", - ], - "preset-intel-qsv-h265": [ - "-load_plugin", - "hevc_hw", - "-hwaccel", - "qsv", - "-qsv_device", - _gpu_selector.get_selected_gpu(), - "-hwaccel_output_format", - "qsv", - "-c:v", - "hevc_qsv", - ], - "preset-nvidia-h264": [ - "-hwaccel", - "cuda", - "-hwaccel_output_format", - "cuda", - ], - "preset-nvidia-h265": [ - "-hwaccel", - "cuda", - "-hwaccel_output_format", - "cuda", - ], - "preset-nvidia-mjpeg": [ - "-hwaccel", - "cuda", - "-hwaccel_output_format", - "cuda", - ], + "preset-rpi-32-h264": "-c:v:1 h264_v4l2m2m", + "preset-rpi-64-h264": "-c:v:1 h264_v4l2m2m", + "preset-vaapi": f"-hwaccel_flags allow_profile_mismatch -hwaccel vaapi -hwaccel_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format vaapi", + "preset-intel-qsv-h264": f"-hwaccel qsv -qsv_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v h264_qsv", + "preset-intel-qsv-h265": f"-load_plugin hevc_hw -hwaccel qsv -qsv_device {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v hevc_qsv", + "preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda", + "preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda", + "preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda", + "preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}", + "preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}", } PRESETS_HW_ACCEL_SCALE = { @@ -117,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = { "preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", + "preset-jetson-h264": "-r {0}", # scaled in decoder + "preset-jetson-h265": "-r {0}", # scaled in decoder "default": "-r {0} -vf fps={0},scale={1}:{2}", } @@ -128,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = { "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}", "preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}", "preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}", + "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}", + "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}", "default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}", } @@ -139,16 +101,28 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = { "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}", "preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}", "preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}", + "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}", + "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}", "default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}", } -def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]: +def parse_preset_hardware_acceleration_decode( + arg: Any, + fps: int, + width: int, + height: int, +) -> list[str]: """Return the correct preset if in preset format otherwise return None.""" if not isinstance(arg, str): return None - return PRESETS_HW_ACCEL_DECODE.get(arg, None) + decode = PRESETS_HW_ACCEL_DECODE.get(arg, None) + + if not decode: + return None + + return decode.format(fps, width, height).split(" ") def parse_preset_hardware_acceleration_scale( @@ -160,20 +134,13 @@ def parse_preset_hardware_acceleration_scale( ) -> list[str]: """Return the correct scaling preset or default preset if none is set.""" if not isinstance(arg, str) or " " in arg: - scale = PRESETS_HW_ACCEL_SCALE["default"].format(fps, width, height).split(" ") - scale.extend(detect_args) - return scale - - scale = PRESETS_HW_ACCEL_SCALE.get(arg, "") - - if scale: - scale = scale.format(fps, width, height).split(" ") - scale.extend(detect_args) - return scale + scale = PRESETS_HW_ACCEL_SCALE["default"] else: - scale = scale.format(fps, width, height).split(" ") - scale.extend(detect_args) - return scale + scale = PRESETS_HW_ACCEL_SCALE.get(arg, "") + + scale = scale.format(fps, width, height).split(" ") + scale.extend(detect_args) + return scale class EncodeTypeEnum(str, Enum): @@ -193,6 +160,10 @@ def parse_preset_hardware_acceleration_encode( if not isinstance(arg, str): return arg_map["default"].format(input, output) + # Not all jetsons have HW encoders, so fall back to default SW encoder if not + if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"): + arg = "default" + return arg_map.get(arg, arg_map["default"]).format( input, output, diff --git a/frigate/stats.py b/frigate/stats.py index 5fdc671ee..da5eb358e 100644 --- a/frigate/stats.py +++ b/frigate/stats.py @@ -22,6 +22,7 @@ from frigate.util.services import ( get_bandwidth_stats, get_cpu_stats, get_intel_gpu_stats, + get_jetson_stats, get_nvidia_gpu_stats, ) from frigate.version import VERSION @@ -180,6 +181,15 @@ async def set_gpu_stats( else: stats["nvidia-gpu"] = {"gpu": -1, "mem": -1} hwaccel_errors.append(args) + elif "nvmpi" in args or "jetson" in args: + # nvidia Jetson + jetson_usage = get_jetson_stats() + + if jetson_usage: + stats["jetson-gpu"] = jetson_usage + else: + stats["jetson-gpu"] = {"gpu": -1, "mem": -1} + hwaccel_errors.append(args) elif "qsv" in args: if not config.telemetry.stats.intel_gpu_stats: continue diff --git a/frigate/util/services.py b/frigate/util/services.py index 507ee76ea..3c591feb9 100644 --- a/frigate/util/services.py +++ b/frigate/util/services.py @@ -309,6 +309,21 @@ def get_nvidia_gpu_stats() -> dict[int, dict]: return results +def get_jetson_stats() -> dict[int, dict]: + results = {} + + try: + results["mem"] = "-" # no discrete gpu memory + + with open("/sys/devices/gpu.0/load", "r") as f: + gpuload = float(f.readline()) / 10 + results["gpu"] = f"{gpuload}%" + except Exception: + return None + + return results + + def ffprobe_stream(path: str) -> sp.CompletedProcess: """Run ffprobe on stream.""" clean_path = escape_special_characters(path)