Nvidia Jetson ffmpeg + TensorRT support (#6458)

* Non-Jetson changes

Required for later commits:
- Allow base image to be overridden (and don't assume its WORKDIR)
- Ensure python3.9
- Map hwaccel decode presets as strings instead of lists
Not required:
- Fix existing documentation
- Simplify hwaccel scale logic

* Prepare for multi-arch tensorrt build

* Add tensorrt images for Jetson boards

* Add Jetson ffmpeg hwaccel

* Update docs

* Add CODEOWNERS

* CI

* Change default model from yolov7-tiny-416 to yolov7-320

In my experience the tiny models perform markedly worse without being
much faster

* fixup! Update docs
This commit is contained in:
Andrew Reiter 2023-07-26 06:50:41 -04:00 committed by GitHub
parent 680198148b
commit a96a951e23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 567 additions and 139 deletions

View File

@ -79,7 +79,7 @@ jobs:
set: | set: |
rpi.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-rpi rpi.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-rpi
*.cache-from=type=gha *.cache-from=type=gha
- name: Build and push TensorRT - name: Build and push TensorRT (x86 GPU)
uses: docker/bake-action@v3 uses: docker/bake-action@v3
with: with:
push: true push: true
@ -88,6 +88,34 @@ jobs:
set: | set: |
tensorrt.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-tensorrt tensorrt.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-tensorrt
*.cache-from=type=gha *.cache-from=type=gha
- name: Build and push TensorRT (Jetson, Jetpack 4)
env:
ARCH: arm64
BASE_IMAGE: timongentzsch/l4t-ubuntu20-opencv:latest
SLIM_BASE: timongentzsch/l4t-ubuntu20-opencv:latest
TRT_BASE: timongentzsch/l4t-ubuntu20-opencv:latest
uses: docker/bake-action@v3
with:
push: true
targets: tensorrt
files: docker/tensorrt/trt.hcl
set: |
tensorrt.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-tensorrt-jp4
*.cache-from=type=gha
- name: Build and push TensorRT (Jetson, Jetpack 5)
env:
ARCH: arm64
BASE_IMAGE: nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime
SLIM_BASE: nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime
TRT_BASE: nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime
uses: docker/bake-action@v3
with:
push: true
targets: tensorrt
files: docker/tensorrt/trt.hcl
set: |
tensorrt.tags=ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ github.ref_name }}-${{ env.SHORT_SHA }}-tensorrt-jp5
*.cache-from=type=gha
- name: Assemble and push default build - name: Assemble and push default build
uses: int128/docker-manifest-create-action@v1 uses: int128/docker-manifest-create-action@v1
with: with:

2
CODEOWNERS Normal file
View File

@ -0,0 +1,2 @@
# Community-supported boards
/docker/tensorrt/ @madsciencetist @NateMeyer

View File

@ -21,6 +21,8 @@ services:
- driver: nvidia - driver: nvidia
count: 1 count: 1
capabilities: [gpu] capabilities: [gpu]
environment:
YOLO_MODELS: yolov7-320
devices: devices:
- /dev/bus/usb:/dev/bus/usb - /dev/bus/usb:/dev/bus/usb
# - /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware # - /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware
@ -30,8 +32,6 @@ services:
- /etc/localtime:/etc/localtime:ro - /etc/localtime:/etc/localtime:ro
- ./config:/config - ./config:/config
- ./debug:/media/frigate - ./debug:/media/frigate
# Create the trt-models folder using the documented method of generating TRT models
# - ./debug/trt-models:/trt-models
- /dev/bus/usb:/dev/bus/usb - /dev/bus/usb:/dev/bus/usb
mqtt: mqtt:
container_name: mqtt container_name: mqtt

View File

@ -3,11 +3,14 @@
# https://askubuntu.com/questions/972516/debian-frontend-environment-variable # https://askubuntu.com/questions/972516/debian-frontend-environment-variable
ARG DEBIAN_FRONTEND=noninteractive ARG DEBIAN_FRONTEND=noninteractive
FROM debian:11 AS base ARG BASE_IMAGE=debian:11
ARG SLIM_BASE=debian:11-slim
FROM ${BASE_IMAGE} AS base
FROM --platform=linux/amd64 debian:11 AS base_amd64 FROM --platform=linux/amd64 debian:11 AS base_amd64
FROM debian:11-slim AS slim-base FROM ${SLIM_BASE} AS slim-base
FROM slim-base AS wget FROM slim-base AS wget
ARG DEBIAN_FRONTEND ARG DEBIAN_FRONTEND
@ -123,8 +126,8 @@ RUN apt-get -qq update \
&& echo "deb http://deb.debian.org/debian bullseye main contrib non-free" | tee /etc/apt/sources.list.d/raspi.list \ && echo "deb http://deb.debian.org/debian bullseye main contrib non-free" | tee /etc/apt/sources.list.d/raspi.list \
&& apt-get -qq update \ && apt-get -qq update \
&& apt-get -qq install -y \ && apt-get -qq install -y \
python3 \ python3.9 \
python3-dev \ python3.9-dev \
wget \ wget \
# opencv dependencies # opencv dependencies
build-essential cmake git pkg-config libgtk-3-dev \ build-essential cmake git pkg-config libgtk-3-dev \
@ -137,14 +140,17 @@ RUN apt-get -qq update \
gcc gfortran libopenblas-dev liblapack-dev && \ gcc gfortran libopenblas-dev liblapack-dev && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Ensure python3 defaults to python3.9
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \
&& python3 get-pip.py "pip" && python3 get-pip.py "pip"
COPY docker/main/requirements.txt /requirements.txt COPY docker/main/requirements.txt /requirements.txt
RUN pip3 install -r requirements.txt RUN pip3 install -r /requirements.txt
COPY docker/main/requirements-wheels.txt /requirements-wheels.txt COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
RUN pip3 wheel --wheel-dir=/wheels -r requirements-wheels.txt RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt
# Collect deps in a single layer # Collect deps in a single layer
@ -176,6 +182,7 @@ RUN --mount=type=bind,source=docker/main/install_deps.sh,target=/deps/install_de
/deps/install_deps.sh /deps/install_deps.sh
RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
python3 -m pip install --upgrade pip && \
pip3 install -U /deps/wheels/*.whl pip3 install -U /deps/wheels/*.whl
COPY --from=deps-rootfs / / COPY --from=deps-rootfs / /

View File

@ -10,11 +10,15 @@ apt-get -qq install --no-install-recommends -y \
wget \ wget \
procps vainfo \ procps vainfo \
unzip locales tzdata libxml2 xz-utils \ unzip locales tzdata libxml2 xz-utils \
python3.9 \
python3-pip \ python3-pip \
curl \ curl \
jq \ jq \
nethogs nethogs
# ensure python3 defaults to python3.9
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
mkdir -p -m 600 /root/.gnupg mkdir -p -m 600 /root/.gnupg
# add coral repo # add coral repo
@ -23,8 +27,10 @@ curl -fsSLo - https://packages.cloud.google.com/apt/doc/apt-key.gpg | \
echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list
echo "libedgetpu1-max libedgetpu/accepted-eula select true" | debconf-set-selections echo "libedgetpu1-max libedgetpu/accepted-eula select true" | debconf-set-selections
# enable non-free repo # enable non-free repo in Debian
sed -i -e's/ main/ main contrib non-free/g' /etc/apt/sources.list if grep -q "Debian" /etc/issue; then
sed -i -e's/ main/ main contrib non-free/g' /etc/apt/sources.list
fi
# coral drivers # coral drivers
apt-get -qq update apt-get -qq update

View File

@ -9,28 +9,11 @@ ARG DEBIAN_FRONTEND
ARG TARGETARCH ARG TARGETARCH
# Add TensorRT wheels to another folder # Add TensorRT wheels to another folder
COPY docker/tensorrt/requirements.txt /requirements-tensorrt.txt COPY docker/tensorrt/requirements-amd64.txt /requirements-tensorrt.txt
RUN mkdir -p /trt-wheels && pip3 wheel --wheel-dir=/trt-wheels -r /requirements-tensorrt.txt RUN mkdir -p /trt-wheels && pip3 wheel --wheel-dir=/trt-wheels -r /requirements-tensorrt.txt
# Build TensorRT-specific library FROM tensorrt-base AS frigate-tensorrt
FROM nvcr.io/nvidia/tensorrt:23.03-py3 AS trt-deps
RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \
/tensorrt_libyolo.sh
# Frigate w/ TensorRT Support as separate image
FROM deps AS frigate-tensorrt
#Disable S6 Global timeout
ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0
ENV TRT_VER=8.5.3 ENV TRT_VER=8.5.3
ENV YOLO_MODELS="yolov7-tiny-416"
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos
COPY docker/tensorrt/detector/rootfs/ /
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
pip3 install -U /deps/trt-wheels/*.whl && \ pip3 install -U /deps/trt-wheels/*.whl && \
ldconfig ldconfig

View File

@ -0,0 +1,79 @@
# syntax=docker/dockerfile:1.4
# https://askubuntu.com/questions/972516/debian-frontend-environment-variable
ARG DEBIAN_FRONTEND=noninteractive
ARG BASE_IMAGE
FROM ${BASE_IMAGE} AS build-wheels
ARG DEBIAN_FRONTEND
# Use a separate container to build wheels to prevent build dependencies in final image
RUN apt-get -qq update \
&& apt-get -qq install -y --no-install-recommends \
python3.9 python3.9-dev \
wget build-essential cmake git \
&& rm -rf /var/lib/apt/lists/*
# Ensure python3 defaults to python3.9
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \
&& python3 get-pip.py "pip"
FROM build-wheels AS trt-wheels
ARG DEBIAN_FRONTEND
ARG TARGETARCH
# python-tensorrt build deps are 3.4 GB!
RUN apt-get update \
&& apt-get install -y ccache cuda-cudart-dev-* cuda-nvcc-* libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev \
&& ([ -e /usr/local/cuda ] || ln -s /usr/local/cuda-* /usr/local/cuda) \
&& rm -rf /var/lib/apt/lists/*;
# Determine version of tensorrt already installed in base image, e.g. "Version: 8.4.1-1+cuda11.4"
RUN NVINFER_VER=$(dpkg -s libnvinfer8 | grep -Po "Version: \K.*") \
&& echo $NVINFER_VER | grep -Po "^\d+\.\d+\.\d+" > /etc/TENSORRT_VER
RUN --mount=type=bind,source=docker/tensorrt/detector/build_python_tensorrt.sh,target=/deps/build_python_tensorrt.sh \
--mount=type=cache,target=/root/.ccache \
export PATH="/usr/lib/ccache:$PATH" CCACHE_DIR=/root/.ccache CCACHE_MAXSIZE=2G \
&& TENSORRT_VER=$(cat /etc/TENSORRT_VER) /deps/build_python_tensorrt.sh
COPY docker/tensorrt/requirements-arm64.txt /requirements-tensorrt.txt
RUN pip3 wheel --wheel-dir=/trt-wheels -r /requirements-tensorrt.txt
FROM build-wheels AS trt-model-wheels
ARG DEBIAN_FRONTEND
RUN apt-get update \
&& apt-get install -y protobuf-compiler libprotobuf-dev \
&& rm -rf /var/lib/apt/lists/*
RUN --mount=type=bind,source=docker/tensorrt/requirements-models-arm64.txt,target=/requirements-tensorrt-models.txt \
pip3 wheel --wheel-dir=/trt-model-wheels -r /requirements-tensorrt-models.txt
FROM wget AS jetson-ffmpeg
ARG DEBIAN_FRONTEND
ENV CCACHE_DIR /root/.ccache
ENV CCACHE_MAXSIZE 2G
RUN --mount=type=bind,source=docker/tensorrt/build_jetson_ffmpeg.sh,target=/deps/build_jetson_ffmpeg.sh \
--mount=type=cache,target=/root/.ccache \
/deps/build_jetson_ffmpeg.sh
# Frigate w/ TensorRT for NVIDIA Jetson platforms
FROM tensorrt-base AS frigate-tensorrt
RUN apt-get update \
&& apt-get install -y python-is-python3 libprotobuf17 \
&& rm -rf /var/lib/apt/lists/*
RUN rm -rf /usr/lib/btbn-ffmpeg/
COPY --from=jetson-ffmpeg /rootfs /
COPY --from=trt-wheels /etc/TENSORRT_VER /etc/TENSORRT_VER
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
--mount=type=bind,from=trt-model-wheels,source=/trt-model-wheels,target=/deps/trt-model-wheels \
pip3 install -U /deps/trt-wheels/*.whl /deps/trt-model-wheels/*.whl \
&& ldconfig
WORKDIR /opt/frigate/
COPY --from=rootfs / /

View File

@ -0,0 +1,26 @@
# syntax=docker/dockerfile:1.4
# https://askubuntu.com/questions/972516/debian-frontend-environment-variable
ARG DEBIAN_FRONTEND=noninteractive
ARG TRT_BASE=nvcr.io/nvidia/tensorrt:23.03-py3
# Build TensorRT-specific library
FROM ${TRT_BASE} AS trt-deps
RUN apt-get update \
&& apt-get install -y git build-essential cuda-nvcc-* cuda-nvtx-* libnvinfer-dev libnvinfer-plugin-dev libnvparsers-dev libnvonnxparsers-dev \
&& rm -rf /var/lib/apt/lists/*
RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \
/tensorrt_libyolo.sh
# Frigate w/ TensorRT Support as separate image
FROM deps AS tensorrt-base
#Disable S6 Global timeout
ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos
COPY docker/tensorrt/detector/rootfs/ /
ENV YOLO_MODELS="yolov7-320"

View File

@ -0,0 +1,59 @@
#!/bin/bash
# For jetson platforms, build ffmpeg with custom patches. NVIDIA supplies a deb
# with accelerated decoding, but it doesn't have accelerated scaling or encoding
set -euxo pipefail
INSTALL_PREFIX=/rootfs/usr/local
apt-get -qq update
apt-get -qq install -y --no-install-recommends build-essential ccache clang cmake pkg-config
apt-get -qq install -y --no-install-recommends libx264-dev libx265-dev
pushd /tmp
# Install libnvmpi to enable nvmpi decoders (h264_nvmpi, hevc_nvmpi)
if [ -e /usr/local/cuda-10.2 ]; then
# assume Jetpack 4.X
wget -q https://developer.nvidia.com/embedded/L4T/r32_Release_v5.0/T186/Jetson_Multimedia_API_R32.5.0_aarch64.tbz2 -O jetson_multimedia_api.tbz2
else
# assume Jetpack 5.X
wget -q https://developer.nvidia.com/downloads/embedded/l4t/r35_release_v3.1/release/jetson_multimedia_api_r35.3.1_aarch64.tbz2 -O jetson_multimedia_api.tbz2
fi
tar xaf jetson_multimedia_api.tbz2 -C / && rm jetson_multimedia_api.tbz2
wget -q https://github.com/madsciencetist/jetson-ffmpeg/archive/refs/heads/master.zip
unzip master.zip && rm master.zip && cd jetson-ffmpeg-master
LD_LIBRARY_PATH=$(pwd)/stubs:$LD_LIBRARY_PATH # tegra multimedia libs aren't available in image, so use stubs for ffmpeg build
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX
make -j$(nproc)
make install
cd ../../
# Install nv-codec-headers to enable ffnvcodec filters (scale_cuda)
wget -q https://github.com/FFmpeg/nv-codec-headers/archive/refs/heads/master.zip
unzip master.zip && rm master.zip && cd nv-codec-headers-master
make PREFIX=$INSTALL_PREFIX install
cd ../ && rm -rf nv-codec-headers-master
# Build ffmpeg with nvmpi patch
wget -q https://ffmpeg.org/releases/ffmpeg-6.0.tar.xz
tar xaf ffmpeg-*.tar.xz && rm ffmpeg-*.tar.xz && cd ffmpeg-*
patch -p1 < ../jetson-ffmpeg-master/ffmpeg_patches/ffmpeg6.0_nvmpi.patch
export PKG_CONFIG_PATH=$INSTALL_PREFIX/lib/pkgconfig
# enable Jetson codecs but disable dGPU codecs
./configure --cc='ccache gcc' --cxx='ccache g++' \
--enable-shared --disable-static --prefix=$INSTALL_PREFIX \
--enable-gpl --enable-libx264 --enable-libx265 \
--enable-nvmpi --enable-ffnvcodec --enable-cuda-llvm \
--disable-cuvid --disable-nvenc --disable-nvdec \
|| { cat ffbuild/config.log && false; }
make -j$(nproc)
make install
cd ../
rm -rf /var/lib/apt/lists/*
popd

View File

@ -0,0 +1,28 @@
#!/bin/bash
set -euxo pipefail
mkdir -p /trt-wheels
if [[ "${TARGETARCH}" == "arm64" ]]; then
# NVIDIA supplies python-tensorrt for python3.8, but frigate uses python3.9,
# so we must build python-tensorrt ourselves.
# Get python-tensorrt source
mkdir /workspace
cd /workspace
git clone -b ${TENSORRT_VER} https://github.com/NVIDIA/TensorRT.git --depth=1
# Collect dependencies
EXT_PATH=/workspace/external && mkdir -p $EXT_PATH
pip3 install pybind11 && ln -s /usr/local/lib/python3.9/dist-packages/pybind11 $EXT_PATH/pybind11
ln -s /usr/include/python3.9 $EXT_PATH/python3.9
ln -s /usr/include/aarch64-linux-gnu/NvOnnxParser.h /workspace/TensorRT/parsers/onnx/
# Build wheel
cd /workspace/TensorRT/python
EXT_PATH=$EXT_PATH PYTHON_MAJOR_VERSION=3 PYTHON_MINOR_VERSION=9 TARGET_ARCHITECTURE=aarch64 /bin/bash ./build.sh
mv build/dist/*.whl /trt-wheels/
fi

View File

@ -2,15 +2,21 @@
# shellcheck shell=bash # shellcheck shell=bash
# Generate models for the TensorRT detector # Generate models for the TensorRT detector
# One or more comma-separated models may be specified via the YOLO_MODELS env.
# Append "-dla" to the model name to generate a DLA model with GPU fallback;
# otherwise a GPU-only model will be generated.
set -o errexit -o nounset -o pipefail set -o errexit -o nounset -o pipefail
MODEL_CACHE_DIR=${MODEL_CACHE_DIR:-"/config/model_cache/tensorrt"} MODEL_CACHE_DIR=${MODEL_CACHE_DIR:-"/config/model_cache/tensorrt"}
TRT_VER=${TRT_VER:-$(cat /etc/TENSORRT_VER)}
OUTPUT_FOLDER="${MODEL_CACHE_DIR}/${TRT_VER}" OUTPUT_FOLDER="${MODEL_CACHE_DIR}/${TRT_VER}"
# Create output folder # Create output folder
mkdir -p ${OUTPUT_FOLDER} mkdir -p ${OUTPUT_FOLDER}
FIRST_MODEL=true FIRST_MODEL=true
MODEL_DOWNLOAD=""
MODEL_CONVERT="" MODEL_CONVERT=""
for model in ${YOLO_MODELS//,/ } for model in ${YOLO_MODELS//,/ }
@ -20,9 +26,11 @@ do
if [[ ! -f ${OUTPUT_FOLDER}/${model}.trt ]]; then if [[ ! -f ${OUTPUT_FOLDER}/${model}.trt ]]; then
if [[ ${FIRST_MODEL} = true ]]; then if [[ ${FIRST_MODEL} = true ]]; then
MODEL_DOWNLOAD="${model%-dla}";
MODEL_CONVERT="${model}" MODEL_CONVERT="${model}"
FIRST_MODEL=false; FIRST_MODEL=false;
else else
MODEL_DOWNLOAD+=",${model%-dla}";
MODEL_CONVERT+=",${model}"; MODEL_CONVERT+=",${model}";
fi fi
else else
@ -35,19 +43,49 @@ if [[ -z ${MODEL_CONVERT} ]]; then
exit 0 exit 0
fi fi
# On Jetpack 4.6, the nvidia container runtime will mount several host nvidia libraries into the
# container which should not be present in the image - if they are, TRT model generation will
# fail or produce invalid models. Thus we must request the user to install them on the host in
# order to run libyolo here.
# On Jetpack 5.0, these libraries are not mounted by the runtime and are supplied by the image.
if [[ "$(arch)" == "aarch64" ]]; then
if [[ ! -e /usr/lib/aarch64-linux-gnu/tegra ]]; then
echo "ERROR: Container must be launched with nvidia runtime"
exit 1
elif [[ ! -e /usr/lib/aarch64-linux-gnu/libnvinfer.so.8 ||
! -e /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so.8 ||
! -e /usr/lib/aarch64-linux-gnu/libnvparsers.so.8 ||
! -e /usr/lib/aarch64-linux-gnu/libnvonnxparser.so.8 ]]; then
echo "ERROR: Please run the following on the HOST:"
echo " sudo apt install libnvinfer8 libnvinfer-plugin8 libnvparsers8 libnvonnxparsers8 nvidia-container"
exit 1
fi
fi
echo "Generating the following TRT Models: ${MODEL_CONVERT}" echo "Generating the following TRT Models: ${MODEL_CONVERT}"
# Build trt engine # Build trt engine
cd /usr/local/src/tensorrt_demos/yolo cd /usr/local/src/tensorrt_demos/yolo
# Download yolo weights echo "Downloading yolo weights"
./download_yolo.sh $MODEL_CONVERT > /dev/null ./download_yolo.sh $MODEL_DOWNLOAD 2> /dev/null
for model in ${MODEL_CONVERT//,/ } for model in ${MODEL_CONVERT//,/ }
do do
echo "Converting ${model} model" python3 yolo_to_onnx.py -m ${model%-dla} > /dev/null
python3 yolo_to_onnx.py -m ${model} > /dev/null
python3 onnx_to_tensorrt.py -m ${model} > /dev/null echo -e "\nGenerating ${model}.trt. This may take a few minutes.\n"; start=$(date +%s)
cp ${model}.trt ${OUTPUT_FOLDER}/${model}.trt if [[ $model == *-dla ]]; then
cmd="python3 onnx_to_tensorrt.py -m ${model%-dla} --dla_core 0"
else
cmd="python3 onnx_to_tensorrt.py -m ${model}"
fi
$cmd > /tmp/onnx_to_tensorrt.log || { cat /tmp/onnx_to_tensorrt.log && continue; }
mv ${model%-dla}.trt ${OUTPUT_FOLDER}/${model}.trt;
ln -s ${OUTPUT_FOLDER}/${model}.trt ${MODEL_CACHE_DIR}/${model}.trt ln -s ${OUTPUT_FOLDER}/${model}.trt ${MODEL_CACHE_DIR}/${model}.trt
echo "Generated ${model}.trt in $(($(date +%s)-start)) seconds"
done done
echo "Available tensorrt models:"
cd ${OUTPUT_FOLDER} && ls *.trt;

View File

@ -8,7 +8,10 @@ SCRIPT_DIR="/usr/local/src/tensorrt_demos"
git clone --depth 1 https://github.com/NateMeyer/tensorrt_demos.git -b conditional_download git clone --depth 1 https://github.com/NateMeyer/tensorrt_demos.git -b conditional_download
# Build libyolo # Build libyolo
cd ./tensorrt_demos/plugins && make all if [ ! -e /usr/local/cuda ]; then
ln -s /usr/local/cuda-* /usr/local/cuda
fi
cd ./tensorrt_demos/plugins && make all -j$(nproc)
cp libyolo_layer.so /usr/local/lib/libyolo_layer.so cp libyolo_layer.so /usr/local/lib/libyolo_layer.so
# Store yolo scripts for later conversion # Store yolo scripts for later conversion

View File

@ -0,0 +1 @@
cuda-python == 11.7; platform_machine == 'aarch64'

View File

@ -0,0 +1,3 @@
onnx == 1.9.0; platform_machine == 'aarch64'
protobuf == 3.20.3; platform_machine == 'aarch64'
numpy == 1.23.*; platform_machine == 'aarch64'

View File

@ -1,19 +1,47 @@
variable "ARCH" {
default = "amd64"
}
variable "BASE_IMAGE" {
default = null
}
variable "SLIM_BASE" {
default = null
}
variable "TRT_BASE" {
default = null
}
target "_build_args" {
args = {
BASE_IMAGE = BASE_IMAGE,
SLIM_BASE = SLIM_BASE,
TRT_BASE = TRT_BASE
}
platforms = ["linux/${ARCH}"]
}
target wget {
dockerfile = "docker/main/Dockerfile"
target = "wget"
inherits = ["_build_args"]
}
target deps { target deps {
dockerfile = "docker/main/Dockerfile" dockerfile = "docker/main/Dockerfile"
platforms = ["linux/amd64"]
target = "deps" target = "deps"
inherits = ["_build_args"]
} }
target rootfs { target rootfs {
dockerfile = "docker/main/Dockerfile" dockerfile = "docker/main/Dockerfile"
platforms = ["linux/amd64"]
target = "rootfs" target = "rootfs"
inherits = ["_build_args"]
} }
target wheels { target wheels {
dockerfile = "docker/main/Dockerfile" dockerfile = "docker/main/Dockerfile"
platforms = ["linux/amd64"]
target = "wheels" target = "wheels"
inherits = ["_build_args"]
} }
target devcontainer { target devcontainer {
@ -22,25 +50,43 @@ target devcontainer {
target = "devcontainer" target = "devcontainer"
} }
target tensorrt { target "trt-deps" {
dockerfile = "docker/tensorrt/Dockerfile" dockerfile = "docker/tensorrt/Dockerfile.base"
context = "." context = "."
contexts = { contexts = {
deps = "target:deps", deps = "target:deps",
}
inherits = ["_build_args"]
}
target "tensorrt-base" {
dockerfile = "docker/tensorrt/Dockerfile.base"
context = "."
contexts = {
deps = "target:deps",
}
inherits = ["_build_args"]
}
target "tensorrt" {
dockerfile = "docker/tensorrt/Dockerfile.${ARCH}"
context = "."
contexts = {
wget = "target:wget",
tensorrt-base = "target:tensorrt-base",
rootfs = "target:rootfs" rootfs = "target:rootfs"
wheels = "target:wheels" wheels = "target:wheels"
} }
platforms = ["linux/amd64"]
target = "frigate-tensorrt" target = "frigate-tensorrt"
inherits = ["_build_args"]
} }
target devcontainer-trt { target "devcontainer-trt" {
dockerfile = "docker/tensorrt/Dockerfile" dockerfile = "docker/tensorrt/Dockerfile.amd64"
context = "." context = "."
contexts = { contexts = {
deps = "target:deps", wheels = "target:wheels",
rootfs = "target:rootfs" trt-deps = "target:trt-deps",
wheels = "target:wheels"
devcontainer = "target:devcontainer" devcontainer = "target:devcontainer"
} }
platforms = ["linux/amd64"] platforms = ["linux/amd64"]

View File

@ -1,10 +1,26 @@
BOARDS += trt BOARDS += trt
JETPACK4_BASE ?= timongentzsch/l4t-ubuntu20-opencv:latest # L4T 32.7.1 JetPack 4.6.1
JETPACK5_BASE ?= nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime # L4T 35.3.1 JetPack 5.1.1
X86_DGPU_ARGS := ARCH=amd64
JETPACK4_ARGS := ARCH=arm64 BASE_IMAGE=$(JETPACK4_BASE) SLIM_BASE=$(JETPACK4_BASE) TRT_BASE=$(JETPACK4_BASE)
JETPACK5_ARGS := ARCH=arm64 BASE_IMAGE=$(JETPACK5_BASE) SLIM_BASE=$(JETPACK5_BASE) TRT_BASE=$(JETPACK5_BASE)
local-trt: version local-trt: version
docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt tensorrt $(X86_DGPU_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt tensorrt
local-trt-jp4: version
$(JETPACK4_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt-jp4 tensorrt
local-trt-jp5: version
$(JETPACK5_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt-jp5 tensorrt
build-trt: build-trt:
docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt $(X86_DGPU_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt
$(JETPACK4_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt
$(JETPACK5_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt
push-trt: build-trt push-trt: build-trt
docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt $(X86_DGPU_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt
$(JETPACK4_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt
$(JETPACK5_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt

View File

@ -11,16 +11,18 @@ It is highly recommended to use hwaccel presets in the config. These presets not
See [the hwaccel docs](/configuration/hardware_acceleration.md) for more info on how to setup hwaccel for your GPU / iGPU. See [the hwaccel docs](/configuration/hardware_acceleration.md) for more info on how to setup hwaccel for your GPU / iGPU.
| Preset | Usage | Other Notes | | Preset | Usage | Other Notes |
| --------------------- | ---------------------------- | ----------------------------------------------------- | | --------------------- | ------------------------------ | ----------------------------------------------------- |
| preset-rpi-32-h264 | 32 bit Rpi with h264 stream | | | preset-rpi-32-h264 | 32 bit Rpi with h264 stream | |
| preset-rpi-64-h264 | 64 bit Rpi with h264 stream | | | preset-rpi-64-h264 | 64 bit Rpi with h264 stream | |
| preset-vaapi | Intel & AMD VAAPI | Check hwaccel docs to ensure correct driver is chosen | | preset-vaapi | Intel & AMD VAAPI | Check hwaccel docs to ensure correct driver is chosen |
| preset-intel-qsv-h264 | Intel QSV with h264 stream | If issues occur recommend using vaapi preset instead | | preset-intel-qsv-h264 | Intel QSV with h264 stream | If issues occur recommend using vaapi preset instead |
| preset-intel-qsv-h265 | Intel QSV with h265 stream | If issues occur recommend using vaapi preset instead | | preset-intel-qsv-h265 | Intel QSV with h265 stream | If issues occur recommend using vaapi preset instead |
| preset-nvidia-h264 | Nvidia GPU with h264 stream | | | preset-nvidia-h264 | Nvidia GPU with h264 stream | |
| preset-nvidia-h265 | Nvidia GPU with h265 stream | | | preset-nvidia-h265 | Nvidia GPU with h265 stream | |
| preset-nvidia-mjpeg | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264 | | preset-nvidia-mjpeg | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264 |
| preset-jetson-h264 | Nvidia Jetson with h264 stream | |
| preset-jetson-h265 | Nvidia Jetson with h265 stream | |
### Input Args Presets ### Input Args Presets

View File

@ -246,3 +246,77 @@ If you do not see these processes, check the `docker logs` for the container and
These instructions were originally based on the [Jellyfin documentation](https://jellyfin.org/docs/general/administration/hardware-acceleration.html#nvidia-hardware-acceleration-on-docker-linux). These instructions were originally based on the [Jellyfin documentation](https://jellyfin.org/docs/general/administration/hardware-acceleration.html#nvidia-hardware-acceleration-on-docker-linux).
# Community Supported # Community Supported
## NVIDIA Jetson (Orin AGX, Orin NX, Orin Nano*, Xavier AGX, Xavier NX, TX2, TX1, Nano)
A separate set of docker images is available that is based on Jetpack/L4T. They comes with an `ffmpeg` build
with codecs that use the Jetson's dedicated media engine. If your Jetson host is running Jetpack 4.6, use the
`frigate-tensorrt-jp4` image, or if your Jetson host is running Jetpack 5.0+, use the `frigate-tensorrt-jp5`
image. Note that the Orin Nano has no video encoder, so frigate will use software encoding on this platform,
but the image will still allow hardware decoding and tensorrt object detection.
You will need to use the image with the nvidia container runtime:
### Docker Run CLI - Jetson
```bash
docker run -d \
...
--runtime nvidia
ghcr.io/blakeblackshear/frigate-tensorrt-jp5
```
### Docker Compose - Jetson
```yaml
version: '2.4'
services:
frigate:
...
image: ghcr.io/blakeblackshear/frigate-tensorrt-jp5
runtime: nvidia # Add this
```
:::note
The `runtime:` tag is not supported on older versions of docker-compose. If you run into this, you can instead use the nvidia runtime system-wide by adding `"default-runtime": "nvidia"` to `/etc/docker/daemon.json`:
```
{
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
},
"default-runtime": "nvidia"
}
```
:::
### Setup Decoder
The decoder you need to pass in the `hwaccel_args` will depend on the input video.
A list of supported codecs (you can use `ffmpeg -decoders | grep nvmpi` in the container to get the ones your card supports)
```
V..... h264_nvmpi h264 (nvmpi) (codec h264)
V..... hevc_nvmpi hevc (nvmpi) (codec hevc)
V..... mpeg2_nvmpi mpeg2 (nvmpi) (codec mpeg2video)
V..... mpeg4_nvmpi mpeg4 (nvmpi) (codec mpeg4)
V..... vp8_nvmpi vp8 (nvmpi) (codec vp8)
V..... vp9_nvmpi vp9 (nvmpi) (codec vp9)
```
For example, for H264 video, you'll select `preset-jetson-h264`.
```yaml
ffmpeg:
hwaccel_args: preset-jetson-h264
```
If everything is working correctly, you should see a significant reduction in ffmpeg CPU load and power consumption.
Verify that hardware decoding is working by running `jtop` (`sudo pip3 install -U jetson-stats`), which should show
that NVDEC/NVDEC1 are in use.

View File

@ -101,7 +101,7 @@ detectors:
# Required: name of the detector # Required: name of the detector
detector_name: detector_name:
# Required: type of the detector # Required: type of the detector
# Frigate provided types include 'cpu', 'edgetpu', and 'openvino' (default: shown below) # Frigate provided types include 'cpu', 'edgetpu', 'openvino' and 'tensorrt' (default: shown below)
# Additional detector types can also be plugged in. # Additional detector types can also be plugged in.
# Detectors may require additional configuration. # Detectors may require additional configuration.
# Refer to the Detectors configuration page for more information. # Refer to the Detectors configuration page for more information.
@ -414,6 +414,8 @@ snapshots:
# Optional: Per object retention days # Optional: Per object retention days
objects: objects:
person: 15 person: 15
# Optional: quality of the encoded jpeg, 0-100 (default: shown below)
quality: 70
# Optional: RTMP configuration # Optional: RTMP configuration
# NOTE: RTMP is deprecated in favor of restream # NOTE: RTMP is deprecated in favor of restream

View File

@ -196,7 +196,9 @@ The model used for TensorRT must be preprocessed on the same hardware platform t
The Frigate image will generate model files during startup if the specified model is not found. Processed models are stored in the `/config/model_cache` folder. Typically the `/config` path is mapped to a directory on the host already and the `model_cache` does not need to be mapped separately unless the user wants to store it in a different location on the host. The Frigate image will generate model files during startup if the specified model is not found. Processed models are stored in the `/config/model_cache` folder. Typically the `/config` path is mapped to a directory on the host already and the `model_cache` does not need to be mapped separately unless the user wants to store it in a different location on the host.
To by default, the `yolov7-tiny-416` model will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. To select no model generation, set the variable to an empty string, `YOLO_MODELS=""`. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder. By default, the `yolov7-320` model will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. To select no model generation, set the variable to an empty string, `YOLO_MODELS=""`. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder.
If you have a Jetson device with DLAs (Xavier or Orin), you can generate a model that will run on the DLA by appending `-dla` to your model name, e.g. specify `YOLO_MODELS=yolov7-320-dla`. The model will run on DLA0 (Frigate does not currently support DLA1). DLA-incompatible layers will fall back to running on the GPU.
If your GPU does not support FP16 operations, you can pass the environment variable `USE_FP16=False` to disable it. If your GPU does not support FP16 operations, you can pass the environment variable `USE_FP16=False` to disable it.
@ -252,11 +254,11 @@ detectors:
device: 0 #This is the default, select the first GPU device: 0 #This is the default, select the first GPU
model: model:
path: /config/model_cache/tensorrt/yolov7-tiny-416.trt path: /config/model_cache/tensorrt/yolov7-320.trt
input_tensor: nchw input_tensor: nchw
input_pixel_format: rgb input_pixel_format: rgb
width: 416 width: 320
height: 416 height: 320
``` ```
## Deepstack / CodeProject.AI Server Detector ## Deepstack / CodeProject.AI Server Detector

View File

@ -101,12 +101,18 @@ This should show <50% CPU in top, and ~80% CPU without `-c:v h264_v4l2m2m`.
ffmpeg -c:v h264_v4l2m2m -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null ffmpeg -c:v h264_v4l2m2m -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
``` ```
**NVIDIA** **NVIDIA GPU**
```shell ```shell
ffmpeg -c:v h264_cuvid -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null ffmpeg -c:v h264_cuvid -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
``` ```
**NVIDIA Jetson**
```shell
ffmpeg -c:v h264_nvmpi -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
```
**VAAPI** **VAAPI**
```shell ```shell

View File

@ -70,7 +70,7 @@ Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known
| Intel i5 1135G7 | 10 - 15 ms | | | Intel i5 1135G7 | 10 - 15 ms | |
| Intel i5 12600K | ~ 15 ms | Inference speeds on CPU were ~ 35 ms | | Intel i5 12600K | ~ 15 ms | Inference speeds on CPU were ~ 35 ms |
### TensorRT ### TensorRT - Nvidia GPU
The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 12.x series of CUDA libraries. The minimum driver version on the host system must be `>=525.60.13`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector). The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 12.x series of CUDA libraries. The minimum driver version on the host system must be `>=525.60.13`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector).
@ -87,6 +87,14 @@ Inference speeds will vary greatly depending on the GPU and the model used.
| Quadro P400 2GB | 20 - 25 ms | | Quadro P400 2GB | 20 - 25 ms |
| Quadro P2000 | ~ 12 ms | | Quadro P2000 | ~ 12 ms |
### Community Supported:
#### Nvidia Jetson
Frigate supports all Jetson boards, from the inexpensive Jetson Nano to the powerful Jetson Orin AGX. It will [make use of the Jetson's hardware media engine](/configuration/hardware_acceleration#nvidia-jetson-orin-agx-orin-nx-orin-nano-xavier-agx-xavier-nx-tx2-tx1-nano) when configured with the [appropriate presets](/configuration/ffmpeg_presets#hwaccel-presets), and will make use of the Jetson's GPU and DLA for object detection when configured with the [TensorRT detector](/configuration/object_detectors#nvidia-tensorrt-detector).
Inference speed will vary depending on the YOLO model, jetson platform and jetson nvpmodel (GPU/DLA/EMC clock speed). It is typically 20-40 ms for most models. The DLA is more efficient than the GPU, but not faster, so using the DLA will reduce power consumption but will slightly increase inference time.
## What does Frigate use the CPU for and what does it use a detector for? (ELI5 Version) ## What does Frigate use the CPU for and what does it use a detector for? (ELI5 Version)
This is taken from a [user question on reddit](https://www.reddit.com/r/homeassistant/comments/q8mgau/comment/hgqbxh5/?utm_source=share&utm_medium=web2x&context=3). Modified slightly for clarity. This is taken from a [user question on reddit](https://www.reddit.com/r/homeassistant/comments/q8mgau/comment/hgqbxh5/?utm_source=share&utm_medium=web2x&context=3). Modified slightly for clarity.

View File

@ -93,6 +93,9 @@ The following officially supported builds are available:
The following community supported builds are available: The following community supported builds are available:
`ghcr.io/blakeblackshear/frigate:stable-tensorrt-jp5` - Frigate build optimized for nvidia Jetson devices running Jetpack 5
`ghcr.io/blakeblackshear/frigate:stable-tensorrt-jp4` - Frigate build optimized for nvidia Jetson devices running Jetpack 4.6
::: :::
```yaml ```yaml

View File

@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel):
ffmpeg_input.global_args or self.ffmpeg.global_args ffmpeg_input.global_args or self.ffmpeg.global_args
) )
hwaccel_args = get_ffmpeg_arg_list( hwaccel_args = get_ffmpeg_arg_list(
parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args) parse_preset_hardware_acceleration_decode(
ffmpeg_input.hwaccel_args,
self.detect.fps,
self.detect.width,
self.detect.height,
)
or ffmpeg_input.hwaccel_args or ffmpeg_input.hwaccel_args
or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args) or parse_preset_hardware_acceleration_decode(
self.ffmpeg.hwaccel_args,
self.detect.fps,
self.detect.width,
self.detect.height,
)
or self.ffmpeg.hwaccel_args or self.ffmpeg.hwaccel_args
) )
input_args = get_ffmpeg_arg_list( input_args = get_ffmpeg_arg_list(

View File

@ -55,58 +55,16 @@ _user_agent_args = [
] ]
PRESETS_HW_ACCEL_DECODE = { PRESETS_HW_ACCEL_DECODE = {
"preset-rpi-32-h264": ["-c:v:1", "h264_v4l2m2m"], "preset-rpi-32-h264": "-c:v:1 h264_v4l2m2m",
"preset-rpi-64-h264": ["-c:v:1", "h264_v4l2m2m"], "preset-rpi-64-h264": "-c:v:1 h264_v4l2m2m",
"preset-vaapi": [ "preset-vaapi": f"-hwaccel_flags allow_profile_mismatch -hwaccel vaapi -hwaccel_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format vaapi",
"-hwaccel_flags", "preset-intel-qsv-h264": f"-hwaccel qsv -qsv_devic {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v h264_qsv",
"allow_profile_mismatch", "preset-intel-qsv-h265": f"-load_plugin hevc_hw -hwaccel qsv -qsv_device {_gpu_selector.get_selected_gpu()} -hwaccel_output_format qsv -c:v hevc_qsv",
"-hwaccel", "preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda",
"vaapi", "preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda",
"-hwaccel_device", "preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda",
_gpu_selector.get_selected_gpu(), "preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}",
"-hwaccel_output_format", "preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}",
"vaapi",
],
"preset-intel-qsv-h264": [
"-hwaccel",
"qsv",
"-qsv_device",
_gpu_selector.get_selected_gpu(),
"-hwaccel_output_format",
"qsv",
"-c:v",
"h264_qsv",
],
"preset-intel-qsv-h265": [
"-load_plugin",
"hevc_hw",
"-hwaccel",
"qsv",
"-qsv_device",
_gpu_selector.get_selected_gpu(),
"-hwaccel_output_format",
"qsv",
"-c:v",
"hevc_qsv",
],
"preset-nvidia-h264": [
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
],
"preset-nvidia-h265": [
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
],
"preset-nvidia-mjpeg": [
"-hwaccel",
"cuda",
"-hwaccel_output_format",
"cuda",
],
} }
PRESETS_HW_ACCEL_SCALE = { PRESETS_HW_ACCEL_SCALE = {
@ -117,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = {
"preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-jetson-h264": "-r {0}", # scaled in decoder
"preset-jetson-h265": "-r {0}", # scaled in decoder
"default": "-r {0} -vf fps={0},scale={1}:{2}", "default": "-r {0} -vf fps={0},scale={1}:{2}",
} }
@ -128,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = {
"preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}", "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
"preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}", "preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
"preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}", "preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
"preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}", "default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}",
} }
@ -139,16 +101,28 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = {
"preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}", "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
"preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}", "preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}",
"preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}", "preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}",
"preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}",
"default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}", "default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}",
} }
def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]: def parse_preset_hardware_acceleration_decode(
arg: Any,
fps: int,
width: int,
height: int,
) -> list[str]:
"""Return the correct preset if in preset format otherwise return None.""" """Return the correct preset if in preset format otherwise return None."""
if not isinstance(arg, str): if not isinstance(arg, str):
return None return None
return PRESETS_HW_ACCEL_DECODE.get(arg, None) decode = PRESETS_HW_ACCEL_DECODE.get(arg, None)
if not decode:
return None
return decode.format(fps, width, height).split(" ")
def parse_preset_hardware_acceleration_scale( def parse_preset_hardware_acceleration_scale(
@ -160,20 +134,13 @@ def parse_preset_hardware_acceleration_scale(
) -> list[str]: ) -> list[str]:
"""Return the correct scaling preset or default preset if none is set.""" """Return the correct scaling preset or default preset if none is set."""
if not isinstance(arg, str) or " " in arg: if not isinstance(arg, str) or " " in arg:
scale = PRESETS_HW_ACCEL_SCALE["default"].format(fps, width, height).split(" ") scale = PRESETS_HW_ACCEL_SCALE["default"]
scale.extend(detect_args)
return scale
scale = PRESETS_HW_ACCEL_SCALE.get(arg, "")
if scale:
scale = scale.format(fps, width, height).split(" ")
scale.extend(detect_args)
return scale
else: else:
scale = scale.format(fps, width, height).split(" ") scale = PRESETS_HW_ACCEL_SCALE.get(arg, "")
scale.extend(detect_args)
return scale scale = scale.format(fps, width, height).split(" ")
scale.extend(detect_args)
return scale
class EncodeTypeEnum(str, Enum): class EncodeTypeEnum(str, Enum):
@ -193,6 +160,10 @@ def parse_preset_hardware_acceleration_encode(
if not isinstance(arg, str): if not isinstance(arg, str):
return arg_map["default"].format(input, output) return arg_map["default"].format(input, output)
# Not all jetsons have HW encoders, so fall back to default SW encoder if not
if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"):
arg = "default"
return arg_map.get(arg, arg_map["default"]).format( return arg_map.get(arg, arg_map["default"]).format(
input, input,
output, output,

View File

@ -22,6 +22,7 @@ from frigate.util.services import (
get_bandwidth_stats, get_bandwidth_stats,
get_cpu_stats, get_cpu_stats,
get_intel_gpu_stats, get_intel_gpu_stats,
get_jetson_stats,
get_nvidia_gpu_stats, get_nvidia_gpu_stats,
) )
from frigate.version import VERSION from frigate.version import VERSION
@ -180,6 +181,15 @@ async def set_gpu_stats(
else: else:
stats["nvidia-gpu"] = {"gpu": -1, "mem": -1} stats["nvidia-gpu"] = {"gpu": -1, "mem": -1}
hwaccel_errors.append(args) hwaccel_errors.append(args)
elif "nvmpi" in args or "jetson" in args:
# nvidia Jetson
jetson_usage = get_jetson_stats()
if jetson_usage:
stats["jetson-gpu"] = jetson_usage
else:
stats["jetson-gpu"] = {"gpu": -1, "mem": -1}
hwaccel_errors.append(args)
elif "qsv" in args: elif "qsv" in args:
if not config.telemetry.stats.intel_gpu_stats: if not config.telemetry.stats.intel_gpu_stats:
continue continue

View File

@ -309,6 +309,21 @@ def get_nvidia_gpu_stats() -> dict[int, dict]:
return results return results
def get_jetson_stats() -> dict[int, dict]:
results = {}
try:
results["mem"] = "-" # no discrete gpu memory
with open("/sys/devices/gpu.0/load", "r") as f:
gpuload = float(f.readline()) / 10
results["gpu"] = f"{gpuload}%"
except Exception:
return None
return results
def ffprobe_stream(path: str) -> sp.CompletedProcess: def ffprobe_stream(path: str) -> sp.CompletedProcess:
"""Run ffprobe on stream.""" """Run ffprobe on stream."""
clean_path = escape_special_characters(path) clean_path = escape_special_characters(path)