mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-08-13 13:47:36 +02:00
features added:
- GPU via TensorRT - CPU: TensorFlow Lite -> Tensorflow
This commit is contained in:
parent
2bc57d271c
commit
2298ca740c
@ -2,5 +2,9 @@ README.md
|
||||
diagram.png
|
||||
.gitignore
|
||||
debug
|
||||
build
|
||||
venv*
|
||||
.idea
|
||||
config/
|
||||
docker-compose.*
|
||||
*.pyc
|
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,4 +1,8 @@
|
||||
*.pyc
|
||||
debug
|
||||
build
|
||||
venv*
|
||||
.vscode
|
||||
config/config.yml
|
||||
.idea
|
||||
config/config.yml
|
||||
docker-compose.*
|
32
Dockerfile
Executable file → Normal file
32
Dockerfile
Executable file → Normal file
@ -9,22 +9,20 @@ RUN apt -qq update && apt -qq install --no-install-recommends -y \
|
||||
build-essential \
|
||||
gnupg wget unzip \
|
||||
# libcap-dev \
|
||||
&& add-apt-repository ppa:deadsnakes/ppa -y \
|
||||
&& apt -qq install --no-install-recommends -y \
|
||||
python3.7 \
|
||||
python3.7-dev \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
ffmpeg \
|
||||
# VAAPI drivers for Intel hardware accel
|
||||
libva-drm2 libva2 i965-va-driver vainfo \
|
||||
&& python3.7 -m pip install -U wheel setuptools \
|
||||
&& python3.7 -m pip install -U \
|
||||
&& python3 -m pip install -U wheel pip setuptools \
|
||||
&& python3 -m pip install -U \
|
||||
opencv-python-headless \
|
||||
# python-prctl \
|
||||
numpy \
|
||||
imutils \
|
||||
scipy \
|
||||
&& python3.7 -m pip install -U \
|
||||
&& python3 -m pip install -U \
|
||||
Flask \
|
||||
paho-mqtt \
|
||||
PyYAML \
|
||||
@ -37,23 +35,27 @@ RUN apt -qq update && apt -qq install --no-install-recommends -y \
|
||||
&& apt -qq install --no-install-recommends -y \
|
||||
libedgetpu1-max \
|
||||
## Tensorflow lite (python 3.7 only)
|
||||
&& wget -q https://dl.google.com/coral/python/tflite_runtime-2.1.0.post1-cp37-cp37m-linux_x86_64.whl \
|
||||
&& python3.7 -m pip install tflite_runtime-2.1.0.post1-cp37-cp37m-linux_x86_64.whl \
|
||||
&& rm tflite_runtime-2.1.0.post1-cp37-cp37m-linux_x86_64.whl \
|
||||
&& wget -q https://dl.google.com/coral/python/tflite_runtime-2.1.0.post1-cp36-cp36m-linux_x86_64.whl \
|
||||
&& python3 -m pip install tflite_runtime-2.1.0.post1-cp36-cp36m-linux_x86_64.whl \
|
||||
&& rm tflite_runtime-2.1.0.post1-cp36-cp36m-linux_x86_64.whl \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& (apt-get autoremove -y; apt-get autoclean -y)
|
||||
&& (apt-get autoremove -y; apt-get autoclean -y) \
|
||||
## Tensorflow
|
||||
&& python3 -m pip install tensorflow==1.15.2
|
||||
|
||||
# get model and labels
|
||||
RUN wget -q https://github.com/google-coral/edgetpu/raw/master/test_data/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite -O /edgetpu_model.tflite --trust-server-names
|
||||
RUN wget -q https://dl.google.com/coral/canned_models/coco_labels.txt -O /labelmap.txt --trust-server-names
|
||||
RUN wget -q https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip -O /cpu_model.zip && \
|
||||
unzip /cpu_model.zip detect.tflite -d / && \
|
||||
mv /detect.tflite /cpu_model.tflite && \
|
||||
rm /cpu_model.zip
|
||||
RUN wget -q http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz -O /cpu_model.tar.gz && \
|
||||
tar -xf /cpu_model.tar.gz -C / ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb --strip-components 1 && \
|
||||
mv /frozen_inference_graph.pb /cpu_model.pb && \
|
||||
rm /cpu_model.tar.gz
|
||||
|
||||
WORKDIR /opt/frigate/
|
||||
ADD frigate frigate/
|
||||
COPY detect_objects.py .
|
||||
COPY benchmark.py .
|
||||
|
||||
CMD ["python3.7", "-u", "detect_objects.py"]
|
||||
ENV TF_CPP_MIN_LOG_LEVEL 2
|
||||
|
||||
CMD ["python3", "-u", "detect_objects.py"]
|
||||
|
183
Dockerfile.gpu
Normal file
183
Dockerfile.gpu
Normal file
@ -0,0 +1,183 @@
|
||||
FROM frigate AS base
|
||||
|
||||
#
|
||||
# CUDA 10.2 base
|
||||
#
|
||||
# https://gitlab.com/nvidia/container-images/cuda/blob/master/dist/ubuntu18.04/10.2/base/Dockerfile
|
||||
#
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gnupg2 curl ca-certificates && \
|
||||
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
|
||||
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
|
||||
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
|
||||
apt-get purge --autoremove -y curl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV CUDA_VERSION 10.2.89
|
||||
LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
|
||||
|
||||
ENV CUDA_PKG_VERSION 10-2=$CUDA_VERSION-1
|
||||
|
||||
# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-cudart-$CUDA_PKG_VERSION \
|
||||
cuda-compat-10-2 && \
|
||||
ln -s cuda-10.2 /usr/local/cuda && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Required for nvidia-docker v1
|
||||
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
|
||||
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
|
||||
|
||||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
|
||||
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||
|
||||
# nvidia-container-runtime
|
||||
ENV NVIDIA_VISIBLE_DEVICES all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA "cuda>=10.2 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=396,driver<397 brand=tesla,driver>=410,driver<411 brand=tesla,driver>=418,driver<419"
|
||||
|
||||
#
|
||||
# CUDA 10.2 runtime
|
||||
#
|
||||
# https://gitlab.com/nvidia/container-images/cuda/blob/master/dist/ubuntu18.04/10.2/runtime/Dockerfile
|
||||
#
|
||||
ENV NCCL_VERSION 2.5.6
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-libraries-$CUDA_PKG_VERSION \
|
||||
cuda-nvtx-$CUDA_PKG_VERSION \
|
||||
libcublas10=10.2.2.89-1 \
|
||||
libnccl2=$NCCL_VERSION-1+cuda10.2 && \
|
||||
apt-mark hold libnccl2 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#
|
||||
# cuDNN 7.6.5.32 runtime
|
||||
#
|
||||
# https://gitlab.com/nvidia/container-images/cuda/blob/master/dist/ubuntu18.04/10.2/runtime/cudnn7/Dockerfile
|
||||
#
|
||||
ENV CUDNN_VERSION 7.6.5.32
|
||||
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libcudnn7=$CUDNN_VERSION-1+cuda10.2 \
|
||||
&& \
|
||||
apt-mark hold libcudnn7 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#
|
||||
# TensorRT 6.0.1.8
|
||||
#
|
||||
# https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/tensorrt-601/tensorrt-install-guide/index.html#maclearn-net-repo-install
|
||||
#
|
||||
ENV TENSORRT_VERSION 6.0.1
|
||||
LABEL com.nvidia.tensorrt.version="${TENSORRT_VERSION}"
|
||||
|
||||
RUN version=$TENSORRT_VERSION-1+cuda10.2 && \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
libnvinfer6=${version} \
|
||||
libnvonnxparsers6=${version} libnvparsers6=${version} \
|
||||
libnvinfer-plugin6=${version} \
|
||||
python3-libnvinfer=${version} \
|
||||
&& \
|
||||
apt-mark hold \
|
||||
libnvinfer6 \
|
||||
libnvonnxparsers6 libnvparsers6 \
|
||||
libnvinfer-plugin6 \
|
||||
python3-libnvinfer \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#
|
||||
# Use a previous stage as a new temporary stage for building libraries
|
||||
#
|
||||
FROM base AS builder
|
||||
|
||||
#
|
||||
# CUDA 10.2 devel
|
||||
#
|
||||
# https://gitlab.com/nvidia/container-images/cuda/blob/master/dist/ubuntu18.04/10.2/devel/Dockerfile
|
||||
#
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
cuda-nvml-dev-$CUDA_PKG_VERSION \
|
||||
cuda-command-line-tools-$CUDA_PKG_VERSION \
|
||||
cuda-libraries-dev-$CUDA_PKG_VERSION \
|
||||
cuda-minimal-build-$CUDA_PKG_VERSION \
|
||||
libnccl-dev=$NCCL_VERSION-1+cuda10.2 \
|
||||
libcublas-dev=10.2.2.89-1 \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
|
||||
|
||||
#
|
||||
# cuDNN 7.6.5.32 devel
|
||||
#
|
||||
# https://gitlab.com/nvidia/container-images/cuda/blob/master/dist/ubuntu18.04/10.2/devel/cudnn7/Dockerfile
|
||||
#
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libcudnn7-dev=$CUDNN_VERSION-1+cuda10.2 \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#
|
||||
# TensorRT 6.0.1.8 devel
|
||||
#
|
||||
# https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/tensorrt-601/tensorrt-install-guide/index.html#maclearn-net-repo-install
|
||||
#
|
||||
RUN version=$TENSORRT_VERSION-1+cuda10.2 && \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
libnvinfer-dev=${version} \
|
||||
libnvonnxparsers-dev=${version} libnvparsers-dev=${version} \
|
||||
libnvinfer-plugin-dev=${version} \
|
||||
&& \
|
||||
apt-mark hold \
|
||||
libnvinfer-dev \
|
||||
libnvonnxparsers-dev libnvparsers-dev \
|
||||
libnvinfer-plugin-dev \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install PyCUDA
|
||||
RUN python3 -m pip install pycuda \
|
||||
&& python3 -m pip wheel --wheel-dir install pycuda
|
||||
|
||||
# Install Cmake
|
||||
ENV CMAKE_VERSION 3.14.4
|
||||
|
||||
RUN cd /tmp && \
|
||||
wget https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-Linux-x86_64.sh && \
|
||||
chmod +x cmake-$CMAKE_VERSION-Linux-x86_64.sh && \
|
||||
./cmake-$CMAKE_VERSION-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir --skip-license && \
|
||||
rm ./cmake-$CMAKE_VERSION-Linux-x86_64.sh
|
||||
|
||||
# Build plugin
|
||||
ADD plugin plugin/
|
||||
RUN mkdir -p build \
|
||||
&& cd build \
|
||||
&& cmake ../plugin \
|
||||
&& make \
|
||||
&& cd ..
|
||||
|
||||
#
|
||||
# Copy libraries to the final image
|
||||
#
|
||||
FROM base AS result
|
||||
|
||||
COPY --from=builder /opt/frigate/install install
|
||||
COPY --from=builder /opt/frigate/build/libflattenconcat.so /usr/lib
|
||||
|
||||
RUN python3 -m pip install install/* \
|
||||
&& rm -r install
|
||||
|
||||
# Get UFF model
|
||||
RUN wget -q https://github.com/dusty-nv/jetson-inference/releases/download/model-mirror-190618/SSD-Mobilenet-v2.tar.gz -O /gpu_model.tar.gz && \
|
||||
tar -xf /gpu_model.tar.gz -C / SSD-Mobilenet-v2/ssd_mobilenet_v2_coco.uff --strip-components 1 && \
|
||||
mv /ssd_mobilenet_v2_coco.uff /gpu_model.uff && \
|
||||
rm /gpu_model.tar.gz
|
||||
|
||||
COPY engine.py .
|
||||
COPY detect_objects_gpu.py .
|
||||
|
||||
CMD ["python3", "-u", "detect_objects_gpu.py"]
|
16
README.md
16
README.md
@ -1,7 +1,7 @@
|
||||
# Frigate - Realtime Object Detection for IP Cameras
|
||||
Uses OpenCV and Tensorflow to perform realtime object detection locally for IP cameras. Designed for integration with HomeAssistant or others via MQTT.
|
||||
Uses OpenCV, Tensorflow/TensorRT to perform realtime object detection locally for IP cameras. Designed for integration with HomeAssistant or others via MQTT.
|
||||
|
||||
Use of a [Google Coral USB Accelerator](https://coral.withgoogle.com/products/accelerator/) is optional, but highly recommended. On my Intel i7 processor, I can process 2-3 FPS with the CPU. The Coral can process 100+ FPS with very low CPU load.
|
||||
Use of a [Google Coral USB Accelerator](https://coral.withgoogle.com/products/accelerator/) or [Nvidia CUDA GPUs](https://developer.nvidia.com/cuda-gpus) is optional, but highly recommended. On my Intel i7 processor, I can process 24 FPS with the CPU. Budget entry-level GPU processes 64 FPS and powerful GPU or the Coral can process 100+ FPS with very low CPU load.
|
||||
|
||||
- Leverages multiprocessing heavily with an emphasis on realtime over processing every frame
|
||||
- Uses a very low overhead motion detection to determine where to run object detection
|
||||
@ -29,6 +29,10 @@ docker run --rm \
|
||||
blakeblackshear/frigate:stable
|
||||
```
|
||||
|
||||
To run GPU accelerated `frigate-gpu` Docker image use the [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-docker/wiki/Installation-(Native-GPU-Support)).
|
||||
If your GPU supports Half precision (also known as FP16), you can boost performance by enabling this mode as follows:
|
||||
`docker run --gpus all --env TRT_FLOAT_PRECISION=16 ...`
|
||||
|
||||
Example docker-compose:
|
||||
```yaml
|
||||
frigate:
|
||||
@ -47,6 +51,8 @@ Example docker-compose:
|
||||
FRIGATE_RTSP_PASSWORD: "password"
|
||||
```
|
||||
|
||||
Please note that native GPU support has not landed in docker-compose [yet](https://github.com/docker/compose/issues/6691).
|
||||
|
||||
A `config.yml` file must exist in the `config` directory. See example [here](config/config.example.yml) and device specific info can be found [here](docs/DEVICES.md).
|
||||
|
||||
Access the mjpeg stream at `http://localhost:5000/<camera_name>` and the best snapshot for any object type with at `http://localhost:5000/<camera_name>/<object_name>/best.jpg`
|
||||
@ -118,10 +124,12 @@ sensor:
|
||||
unit_of_measurement: 'ms'
|
||||
```
|
||||
## Using a custom model
|
||||
Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use your own models with volume mounts:
|
||||
- CPU Model: `/cpu_model.tflite`
|
||||
Models for CPU/GPU and EdgeTPU (Coral) are bundled in the images. You can use your own models with volume mounts:
|
||||
- CPU Model: `/cpu_model.pb`
|
||||
- GPU Model: `/gpu_model.uff`
|
||||
- EdgeTPU Model: `/edgetpu_model.tflite`
|
||||
- Labels: `/labelmap.txt`
|
||||
|
||||
## Tips
|
||||
- Lower the framerate of the video feed on the camera to reduce the CPU usage for capturing the feed
|
||||
- Choose smaller camera resolution as the images are resized to the shape of the model 300x300 anyway
|
||||
|
0
benchmark.py
Executable file → Normal file
0
benchmark.py
Executable file → Normal file
14
detect_objects_gpu.py
Normal file
14
detect_objects_gpu.py
Normal file
@ -0,0 +1,14 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not os.path.isfile('/gpu_model.buf'):
|
||||
engine = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'engine.py')
|
||||
subprocess.run(['python3', '-u', engine,
|
||||
'-i', '/gpu_model.uff',
|
||||
'-o', '/gpu_model.buf',
|
||||
'-p', os.getenv('TRT_FLOAT_PRECISION', '32')
|
||||
], check=True)
|
||||
|
||||
from detect_objects import main as detect_objects_main
|
||||
detect_objects_main()
|
87
engine.py
Normal file
87
engine.py
Normal file
@ -0,0 +1,87 @@
|
||||
import ctypes
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import tensorrt as trt
|
||||
|
||||
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
|
||||
|
||||
|
||||
def model_input_shape():
|
||||
return 3, 300, 300
|
||||
|
||||
|
||||
def build_engine(uff_model_path, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1):
|
||||
with trt.Builder(TRT_LOGGER) as builder, \
|
||||
builder.create_network() as network, \
|
||||
trt.UffParser() as parser:
|
||||
builder.max_workspace_size = 1 << 30
|
||||
builder.max_batch_size = batch_size
|
||||
if trt_engine_datatype == trt.DataType.HALF:
|
||||
builder.fp16_mode = True
|
||||
|
||||
parser.register_input("Input", model_input_shape())
|
||||
parser.register_output("MarkOutput_0")
|
||||
parser.parse(uff_model_path, network)
|
||||
|
||||
return builder.build_cuda_engine(network)
|
||||
|
||||
|
||||
def save_engine(engine, engine_dest_path):
|
||||
os.makedirs(os.path.dirname(engine_dest_path), exist_ok=True)
|
||||
buf = engine.serialize()
|
||||
with open(engine_dest_path, 'wb') as f:
|
||||
f.write(buf)
|
||||
|
||||
|
||||
def load_engine(trt_runtime, engine_path):
|
||||
with open(engine_path, 'rb') as f:
|
||||
engine_data = f.read()
|
||||
engine = trt_runtime.deserialize_cuda_engine(engine_data)
|
||||
return engine
|
||||
|
||||
|
||||
def load_plugins():
|
||||
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
|
||||
|
||||
try:
|
||||
ctypes.CDLL('libflattenconcat.so')
|
||||
except Exception as e:
|
||||
print("Error: {}\n{}".format(e, "Make sure FlattenConcat custom plugin layer is provided"))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
TRT_PRECISION_TO_DATATYPE = {
|
||||
16: trt.DataType.HALF,
|
||||
32: trt.DataType.FLOAT
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Define script command line arguments
|
||||
parser = argparse.ArgumentParser(description='Utility to build TensorRT engine prior to inference.')
|
||||
parser.add_argument('-i', "--input",
|
||||
dest='uff_model_path', metavar='UFF_MODEL_PATH', required=True,
|
||||
help='preprocessed TensorFlow model in UFF format')
|
||||
parser.add_argument('-p', '--precision', type=int, choices=[32, 16], default=32,
|
||||
help='desired TensorRT float precision to build an engine with')
|
||||
parser.add_argument('-b', '--batch_size', type=int, default=1,
|
||||
help='max TensorRT engine batch size')
|
||||
parser.add_argument("-o", "--output", dest='trt_engine_path',
|
||||
help="path of the output file",
|
||||
default=os.path.join(os.path.dirname(os.path.abspath(__file__)), "engine.buf"))
|
||||
|
||||
# Parse arguments passed
|
||||
args = parser.parse_args()
|
||||
|
||||
load_plugins()
|
||||
|
||||
# Using supplied .uff file alongside with UffParser build TensorRT engine
|
||||
print("Building TensorRT engine. This may take few minutes.")
|
||||
trt_engine = build_engine(
|
||||
uff_model_path=args.uff_model_path,
|
||||
trt_engine_datatype=TRT_PRECISION_TO_DATATYPE[args.precision],
|
||||
batch_size=args.batch_size)
|
||||
|
||||
# Save the engine to file
|
||||
save_engine(trt_engine, args.trt_engine_path)
|
||||
print("TensorRT engine saved to {}".format(args.trt_engine_path))
|
@ -7,6 +7,12 @@ import pyarrow.plasma as plasma
|
||||
import tflite_runtime.interpreter as tflite
|
||||
from tflite_runtime.interpreter import load_delegate
|
||||
from frigate.util import EventsPerSecond, listen
|
||||
from frigate.tensorflowcpu import ObjectDetector as CPUObjectDetector
|
||||
try:
|
||||
import pycuda.driver as cuda
|
||||
from frigate.tensorrtgpu import ObjectDetector as GPUObjectDetector
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def load_labels(path, encoding='utf-8'):
|
||||
"""Loads labels from file (with or without index numbers).
|
||||
@ -28,26 +34,22 @@ def load_labels(path, encoding='utf-8'):
|
||||
return {index: line.strip() for index, line in enumerate(lines)}
|
||||
|
||||
class ObjectDetector():
|
||||
def __init__(self):
|
||||
edge_tpu_delegate = None
|
||||
try:
|
||||
edge_tpu_delegate = load_delegate('libedgetpu.so.1.0')
|
||||
except ValueError:
|
||||
print("No EdgeTPU detected. Falling back to CPU.")
|
||||
|
||||
if edge_tpu_delegate is None:
|
||||
self.interpreter = tflite.Interpreter(
|
||||
model_path='/cpu_model.tflite')
|
||||
else:
|
||||
self.interpreter = tflite.Interpreter(
|
||||
model_path='/edgetpu_model.tflite',
|
||||
experimental_delegates=[edge_tpu_delegate])
|
||||
|
||||
def __init__(self, edge_tpu_delegate):
|
||||
self.interpreter = tflite.Interpreter(
|
||||
model_path='/edgetpu_model.tflite',
|
||||
experimental_delegates=[edge_tpu_delegate])
|
||||
|
||||
self.interpreter.allocate_tensors()
|
||||
|
||||
self.tensor_input_details = self.interpreter.get_input_details()
|
||||
self.tensor_output_details = self.interpreter.get_output_details()
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
pass
|
||||
|
||||
def detect_raw(self, tensor_input):
|
||||
self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
|
||||
self.interpreter.invoke()
|
||||
@ -57,34 +59,62 @@ class ObjectDetector():
|
||||
|
||||
detections = np.zeros((20,6), np.float32)
|
||||
for i, score in enumerate(scores):
|
||||
if i == detections.shape[0]:
|
||||
break
|
||||
detections[i] = [label_codes[i], score, boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]]
|
||||
|
||||
|
||||
return detections
|
||||
|
||||
def create_object_detector():
|
||||
edge_tpu_delegate = None
|
||||
try:
|
||||
edge_tpu_delegate = load_delegate('libedgetpu.so.1.0')
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if edge_tpu_delegate is not None:
|
||||
return ObjectDetector(edge_tpu_delegate)
|
||||
|
||||
gpu_device_count = 0
|
||||
try:
|
||||
cuda.init()
|
||||
gpu_device_count = cuda.Device.count()
|
||||
except (RuntimeError, TypeError, NameError):
|
||||
pass
|
||||
except cuda.RuntimeError:
|
||||
pass
|
||||
|
||||
if gpu_device_count > 0:
|
||||
print("No EdgeTPU detected. Falling back to GPU.")
|
||||
return GPUObjectDetector()
|
||||
|
||||
print("No EdgeTPU or GPU detected. Falling back to CPU.")
|
||||
return CPUObjectDetector()
|
||||
|
||||
def run_detector(detection_queue, avg_speed, start):
|
||||
print(f"Starting detection process: {os.getpid()}")
|
||||
listen()
|
||||
plasma_client = plasma.connect("/tmp/plasma")
|
||||
object_detector = ObjectDetector()
|
||||
|
||||
while True:
|
||||
object_id_str = detection_queue.get()
|
||||
object_id_hash = hashlib.sha1(str.encode(object_id_str))
|
||||
object_id = plasma.ObjectID(object_id_hash.digest())
|
||||
object_id_out = plasma.ObjectID(hashlib.sha1(str.encode(f"out-{object_id_str}")).digest())
|
||||
input_frame = plasma_client.get(object_id, timeout_ms=0)
|
||||
with create_object_detector() as object_detector:
|
||||
while True:
|
||||
object_id_str = detection_queue.get()
|
||||
object_id_hash = hashlib.sha1(str.encode(object_id_str))
|
||||
object_id = plasma.ObjectID(object_id_hash.digest())
|
||||
object_id_out = plasma.ObjectID(hashlib.sha1(str.encode(f"out-{object_id_str}")).digest())
|
||||
input_frame = plasma_client.get(object_id, timeout_ms=0)
|
||||
|
||||
if input_frame is plasma.ObjectNotAvailable:
|
||||
continue
|
||||
if input_frame is plasma.ObjectNotAvailable:
|
||||
continue
|
||||
|
||||
# detect and put the output in the plasma store
|
||||
start.value = datetime.datetime.now().timestamp()
|
||||
plasma_client.put(object_detector.detect_raw(input_frame), object_id_out)
|
||||
duration = datetime.datetime.now().timestamp()-start.value
|
||||
start.value = 0.0
|
||||
# detect and put the output in the plasma store
|
||||
start.value = datetime.datetime.now().timestamp()
|
||||
plasma_client.put(object_detector.detect_raw(input_frame), object_id_out)
|
||||
duration = datetime.datetime.now().timestamp()-start.value
|
||||
start.value = 0.0
|
||||
|
||||
avg_speed.value = (avg_speed.value*9 + duration)/10
|
||||
|
||||
avg_speed.value = (avg_speed.value*9 + duration)/10
|
||||
|
||||
class EdgeTPUProcess():
|
||||
def __init__(self):
|
||||
self.detection_queue = mp.SimpleQueue()
|
||||
@ -114,7 +144,7 @@ class RemoteObjectDetector():
|
||||
self.fps = EventsPerSecond()
|
||||
self.plasma_client = plasma.connect("/tmp/plasma")
|
||||
self.detection_queue = detection_queue
|
||||
|
||||
|
||||
def detect(self, tensor_input, threshold=.4):
|
||||
detections = []
|
||||
|
||||
@ -139,4 +169,4 @@ class RemoteObjectDetector():
|
||||
))
|
||||
self.plasma_client.delete([object_id_frame, object_id_detections])
|
||||
self.fps.update()
|
||||
return detections
|
||||
return detections
|
||||
|
51
frigate/tensorflowcpu.py
Normal file
51
frigate/tensorflowcpu.py
Normal file
@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class ObjectDetector():
|
||||
def __init__(self):
|
||||
self.detection_graph = tf.Graph()
|
||||
with self.detection_graph.as_default():
|
||||
od_graph_def = tf.compat.v1.GraphDef()
|
||||
with tf.io.gfile.GFile('/cpu_model.pb', 'rb') as fid:
|
||||
serialized_graph = fid.read()
|
||||
od_graph_def.ParseFromString(serialized_graph)
|
||||
tf.import_graph_def(od_graph_def, name='')
|
||||
|
||||
config = tf.compat.v1.ConfigProto(
|
||||
device_count={'GPU': 0}
|
||||
)
|
||||
self.sess = tf.compat.v1.Session(
|
||||
graph=self.detection_graph,
|
||||
config=config)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
pass
|
||||
|
||||
def detect_raw(self, tensor_input):
|
||||
ops = self.detection_graph.get_operations()
|
||||
all_tensor_names = {output.name for op in ops for output in op.outputs}
|
||||
tensor_dict = {}
|
||||
for key in ['detection_boxes', 'detection_scores', 'detection_classes']:
|
||||
tensor_name = key + ':0'
|
||||
if tensor_name in all_tensor_names:
|
||||
tensor_dict[key] = self.detection_graph.get_tensor_by_name(tensor_name)
|
||||
|
||||
image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
|
||||
output_dict = self.sess.run(tensor_dict,
|
||||
feed_dict={image_tensor: tensor_input})
|
||||
|
||||
boxes = output_dict['detection_boxes'][0]
|
||||
label_codes = output_dict['detection_classes'][0] - 1
|
||||
scores = output_dict['detection_scores'][0]
|
||||
|
||||
detections = np.zeros((20, 6), np.float32)
|
||||
for i, score in enumerate(scores):
|
||||
if i == detections.shape[0]:
|
||||
break
|
||||
detections[i] = [label_codes[i], score, boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]]
|
||||
|
||||
return detections
|
96
frigate/tensorrtgpu.py
Normal file
96
frigate/tensorrtgpu.py
Normal file
@ -0,0 +1,96 @@
|
||||
import numpy as np
|
||||
import pycuda.driver as cuda
|
||||
import tensorrt as trt
|
||||
import engine
|
||||
from collections import namedtuple
|
||||
from pycuda.tools import make_default_context
|
||||
from pycuda.tools import clear_context_caches
|
||||
|
||||
HostDeviceMem = namedtuple('HostDeviceMem', 'host device')
|
||||
|
||||
|
||||
class ObjectDetector():
|
||||
def __init__(self):
|
||||
self.context = make_default_context()
|
||||
self.device = self.context.get_device()
|
||||
|
||||
engine.load_plugins()
|
||||
|
||||
self.trt_runtime = trt.Runtime(engine.TRT_LOGGER)
|
||||
self.trt_engine = engine.load_engine(self.trt_runtime, '/gpu_model.buf')
|
||||
|
||||
self._allocate_buffers()
|
||||
self.execution_context = self.trt_engine.create_execution_context()
|
||||
|
||||
input_volume = trt.volume(engine.model_input_shape())
|
||||
self.numpy_array = np.zeros((self.trt_engine.max_batch_size, input_volume))
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.context.pop()
|
||||
self.context = None
|
||||
|
||||
clear_context_caches()
|
||||
|
||||
def detect_raw(self, tensor_input):
|
||||
# HWC -> CHW
|
||||
img_np = tensor_input.transpose((0, 3, 1, 2))
|
||||
# Normalize to [-1.0, 1.0] interval (expected by model)
|
||||
img_np = (2.0 / 255.0) * img_np - 1.0
|
||||
img_np = img_np.ravel()
|
||||
|
||||
np.copyto(self.inputs[0].host, img_np)
|
||||
detection_out, keep_count_out = self._do_inference()
|
||||
|
||||
detections = np.zeros((20, 6), np.float32)
|
||||
for i in range(int(keep_count_out[0])):
|
||||
if i == detections.shape[0]:
|
||||
break
|
||||
pred_start_idx = i * 7
|
||||
label = detection_out[pred_start_idx + 1] - 1
|
||||
score = detection_out[pred_start_idx + 2]
|
||||
xmin = detection_out[pred_start_idx + 3]
|
||||
ymin = detection_out[pred_start_idx + 4]
|
||||
xmax = detection_out[pred_start_idx + 5]
|
||||
ymax = detection_out[pred_start_idx + 6]
|
||||
detections[i] = [label, score, ymin, xmin, ymax, xmax]
|
||||
|
||||
return detections
|
||||
|
||||
def _do_inference(self):
|
||||
[cuda.memcpy_htod_async(inp.device, inp.host, self.stream) for inp in self.inputs]
|
||||
self.execution_context.execute_async(batch_size=self.trt_engine.max_batch_size,
|
||||
bindings=self.bindings,
|
||||
stream_handle=self.stream.handle)
|
||||
[cuda.memcpy_dtoh_async(out.host, out.device, self.stream) for out in self.outputs]
|
||||
self.stream.synchronize()
|
||||
return [out.host for out in self.outputs]
|
||||
|
||||
def _allocate_buffers(self):
|
||||
self.inputs = []
|
||||
self.outputs = []
|
||||
self.bindings = []
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
# NMS implementation in TRT 6 only supports DataType.FLOAT
|
||||
binding_to_type = {"Input": np.float32,
|
||||
"NMS": np.float32,
|
||||
"NMS_1": np.int32}
|
||||
for binding in self.trt_engine:
|
||||
size = trt.volume(self.trt_engine.get_binding_shape(binding)) * self.trt_engine.max_batch_size
|
||||
dtype = binding_to_type[str(binding)]
|
||||
|
||||
# Allocate host and device buffers
|
||||
host_mem = cuda.pagelocked_empty(size, dtype)
|
||||
device_mem = cuda.mem_alloc(host_mem.nbytes)
|
||||
|
||||
# Append the device buffer to device bindings.
|
||||
self.bindings.append(int(device_mem))
|
||||
|
||||
# Append to the appropriate list.
|
||||
if self.trt_engine.binding_is_input(binding):
|
||||
self.inputs.append(HostDeviceMem(host_mem, device_mem))
|
||||
else:
|
||||
self.outputs.append(HostDeviceMem(host_mem, device_mem))
|
0
frigate/util.py
Executable file → Normal file
0
frigate/util.py
Executable file → Normal file
0
frigate/video.py
Executable file → Normal file
0
frigate/video.py
Executable file → Normal file
47
plugin/CMakeLists.txt
Normal file
47
plugin/CMakeLists.txt
Normal file
@ -0,0 +1,47 @@
|
||||
cmake_minimum_required(VERSION 3.2 FATAL_ERROR)
|
||||
|
||||
project(FlattenConcat LANGUAGES CXX)
|
||||
|
||||
# Enable all compile warnings
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-long-long -pedantic -Wno-deprecated-declarations")
|
||||
# Use C++11
|
||||
set (CMAKE_CXX_STANDARD 11)
|
||||
|
||||
# Sets variable to a value if variable is unset.
|
||||
macro(set_ifndef var val)
|
||||
if (NOT ${var})
|
||||
set(${var} ${val})
|
||||
endif()
|
||||
message(STATUS "Configurable variable ${var} set to ${${var}}")
|
||||
endmacro()
|
||||
|
||||
# -------- CONFIGURATION --------
|
||||
find_package(CUDA REQUIRED)
|
||||
|
||||
set_ifndef(TRT_LIB /usr/lib/x86_64-linux-gnu)
|
||||
set_ifndef(TRT_INCLUDE /usr/include/x86_64-linux-gnu)
|
||||
set_ifndef(CUDA_ROOT /usr/local/cuda)
|
||||
|
||||
# Find dependencies:
|
||||
message("\nThe following variables are derived from the values of the previous variables unless provided explicitly:\n")
|
||||
|
||||
# TensorRT's nvinfer lib
|
||||
find_library(_NVINFER_LIB nvinfer HINTS ${TRT_LIB} PATH_SUFFIXES lib lib64)
|
||||
set_ifndef(NVINFER_LIB ${_NVINFER_LIB})
|
||||
|
||||
# cuBLAS
|
||||
find_library(_CUBLAS_LIB cublas HINTS ${CUDA_ROOT} PATH_SUFFIXES lib lib64)
|
||||
set_ifndef(CUBLAS_LIB ${_CUBLAS_LIB})
|
||||
|
||||
# CUDA include dir
|
||||
find_path(_CUDA_INC_DIR cuda_runtime_api.h HINTS ${CUDA_ROOT} PATH_SUFFIXES include)
|
||||
set_ifndef(CUDA_INC_DIR ${_CUDA_INC_DIR})
|
||||
|
||||
# -------- BUILDING --------
|
||||
include_directories(${TRT_INCLUDE} ${CUDA_INC_DIR})
|
||||
add_library(flattenconcat MODULE
|
||||
${CMAKE_SOURCE_DIR}/FlattenConcat.cpp
|
||||
)
|
||||
|
||||
# Link TensorRT's nvinfer lib
|
||||
target_link_libraries(flattenconcat PRIVATE ${NVINFER_LIB} ${CUBLAS_LIB})
|
320
plugin/FlattenConcat.cpp
Normal file
320
plugin/FlattenConcat.cpp
Normal file
@ -0,0 +1,320 @@
|
||||
/*
|
||||
* The TensorFlow SSD graph has some operations that are currently not supported in TensorRT.
|
||||
* Using a preprocessor on the graph, multiple operations in the graph are combined into a
|
||||
* single custom operation which is implemented as a plugin layer in TensorRT. The preprocessor
|
||||
* stitches all nodes within a namespace into one custom node.
|
||||
*
|
||||
* The plugin called `FlattenConcat` is used to flatten each input and then concatenate the
|
||||
* results. This is applied to the location and confidence data before it is fed to the post
|
||||
* processor step.
|
||||
*
|
||||
* Loading FlattenConcat plugin library using CDLL has a side effect of loading FlattenConcat
|
||||
* plugin into internal TensorRT plugin registry: the latter has FlattenConcat shipped with
|
||||
* TensorRT, while we load own version. There are subtle differences between built-in
|
||||
* FlattenConcat and this one.
|
||||
*
|
||||
* The pre-trained TensorFlow model has been converted to UFF format using this FlattenConcat
|
||||
* plugin and we have to stick to it when building a TensorRT inference engine. To avoid collision
|
||||
* with built-in plugin of the same name of version "1" we set version "B" and load it the last.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include <cublas_v2.h>
|
||||
|
||||
#include "NvInferPlugin.h"
|
||||
|
||||
// Macro for calling GPU functions
|
||||
#define CHECK(status) \
|
||||
do \
|
||||
{ \
|
||||
auto ret = (status); \
|
||||
if (ret != 0) \
|
||||
{ \
|
||||
std::cout << "Cuda failure: " << ret; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
using namespace nvinfer1;
|
||||
|
||||
namespace
|
||||
{
|
||||
const char* FLATTENCONCAT_PLUGIN_VERSION{"B"};
|
||||
const char* FLATTENCONCAT_PLUGIN_NAME{"FlattenConcat_TRT"};
|
||||
}
|
||||
|
||||
// Flattens all input tensors and concats their flattened version together
|
||||
// along the major non-batch dimension, i.e axis = 1
|
||||
class FlattenConcat : public IPluginV2
|
||||
{
|
||||
public:
|
||||
// Ordinary ctor, plugin not yet configured for particular inputs/output
|
||||
FlattenConcat() {}
|
||||
|
||||
// Ctor for clone()
|
||||
FlattenConcat(const int* flattenedInputSize, int numInputs, int flattenedOutputSize)
|
||||
: mFlattenedOutputSize(flattenedOutputSize)
|
||||
{
|
||||
for (int i = 0; i < numInputs; ++i)
|
||||
mFlattenedInputSize.push_back(flattenedInputSize[i]);
|
||||
}
|
||||
|
||||
// Ctor for loading from serialized byte array
|
||||
FlattenConcat(const void* data, size_t length)
|
||||
{
|
||||
const char* d = reinterpret_cast<const char*>(data);
|
||||
const char* a = d;
|
||||
|
||||
size_t numInputs = read<size_t>(d);
|
||||
for (size_t i = 0; i < numInputs; ++i)
|
||||
{
|
||||
mFlattenedInputSize.push_back(read<int>(d));
|
||||
}
|
||||
mFlattenedOutputSize = read<int>(d);
|
||||
|
||||
assert(d == a + length);
|
||||
}
|
||||
|
||||
int getNbOutputs() const override
|
||||
{
|
||||
// We always return one output
|
||||
return 1;
|
||||
}
|
||||
|
||||
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
|
||||
{
|
||||
// At least one input
|
||||
assert(nbInputDims >= 1);
|
||||
// We only have one output, so it doesn't
|
||||
// make sense to check index != 0
|
||||
assert(index == 0);
|
||||
|
||||
size_t flattenedOutputSize = 0;
|
||||
int inputVolume = 0;
|
||||
|
||||
for (int i = 0; i < nbInputDims; ++i)
|
||||
{
|
||||
// We only support NCHW. And inputs Dims are without batch num.
|
||||
assert(inputs[i].nbDims == 3);
|
||||
|
||||
inputVolume = inputs[i].d[0] * inputs[i].d[1] * inputs[i].d[2];
|
||||
flattenedOutputSize += inputVolume;
|
||||
}
|
||||
|
||||
return DimsCHW(flattenedOutputSize, 1, 1);
|
||||
}
|
||||
|
||||
int initialize() override
|
||||
{
|
||||
// Called on engine initialization, we initialize cuBLAS library here,
|
||||
// since we'll be using it for inference
|
||||
CHECK(cublasCreate(&mCublas));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void terminate() override
|
||||
{
|
||||
// Called on engine destruction, we destroy cuBLAS data structures,
|
||||
// which were created in initialize()
|
||||
CHECK(cublasDestroy(mCublas));
|
||||
}
|
||||
|
||||
size_t getWorkspaceSize(int maxBatchSize) const override
|
||||
{
|
||||
// The operation is done in place, it doesn't use GPU memory
|
||||
return 0;
|
||||
}
|
||||
|
||||
int enqueue(int batchSize, const void* const* inputs, void** outputs, void*, cudaStream_t stream) override
|
||||
{
|
||||
// Does the actual concat of inputs, which is just
|
||||
// copying all inputs bytes to output byte array
|
||||
size_t inputOffset = 0;
|
||||
float* output = reinterpret_cast<float*>(outputs[0]);
|
||||
cublasSetStream(mCublas, stream);
|
||||
|
||||
for (size_t i = 0; i < mFlattenedInputSize.size(); ++i)
|
||||
{
|
||||
const float* input = reinterpret_cast<const float*>(inputs[i]);
|
||||
for (int batchIdx = 0; batchIdx < batchSize; ++batchIdx)
|
||||
{
|
||||
CHECK(cublasScopy(mCublas, mFlattenedInputSize[i],
|
||||
input + batchIdx * mFlattenedInputSize[i], 1,
|
||||
output + (batchIdx * mFlattenedOutputSize + inputOffset), 1));
|
||||
}
|
||||
inputOffset += mFlattenedInputSize[i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t getSerializationSize() const override
|
||||
{
|
||||
// Returns FlattenConcat plugin serialization size
|
||||
size_t size = sizeof(mFlattenedInputSize[0]) * mFlattenedInputSize.size()
|
||||
+ sizeof(mFlattenedOutputSize)
|
||||
+ sizeof(size_t); // For serializing mFlattenedInputSize vector size
|
||||
return size;
|
||||
}
|
||||
|
||||
void serialize(void* buffer) const override
|
||||
{
|
||||
// Serializes FlattenConcat plugin into byte array
|
||||
|
||||
// Cast buffer to char* and save its beginning to a,
|
||||
// (since value of d will be changed during write)
|
||||
char* d = reinterpret_cast<char*>(buffer);
|
||||
char* a = d;
|
||||
|
||||
size_t numInputs = mFlattenedInputSize.size();
|
||||
|
||||
// Write FlattenConcat fields into buffer
|
||||
write(d, numInputs);
|
||||
for (size_t i = 0; i < numInputs; ++i)
|
||||
{
|
||||
write(d, mFlattenedInputSize[i]);
|
||||
}
|
||||
write(d, mFlattenedOutputSize);
|
||||
|
||||
// Sanity check - checks if d is offset
|
||||
// from a by exactly the size of serialized plugin
|
||||
assert(d == a + getSerializationSize());
|
||||
}
|
||||
|
||||
void configureWithFormat(const Dims* inputs, int nbInputs, const Dims* outputDims, int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) override
|
||||
{
|
||||
// We only support one output
|
||||
assert(nbOutputs == 1);
|
||||
|
||||
// Reset plugin private data structures
|
||||
mFlattenedInputSize.clear();
|
||||
mFlattenedOutputSize = 0;
|
||||
|
||||
// For each input we save its size, we also validate it
|
||||
for (int i = 0; i < nbInputs; ++i)
|
||||
{
|
||||
int inputVolume = 0;
|
||||
|
||||
// We only support NCHW. And inputs Dims are without batch num.
|
||||
assert(inputs[i].nbDims == 3);
|
||||
|
||||
// All inputs dimensions along non concat axis should be same
|
||||
for (size_t dim = 1; dim < 3; dim++)
|
||||
{
|
||||
assert(inputs[i].d[dim] == inputs[0].d[dim]);
|
||||
}
|
||||
|
||||
// Size of flattened input
|
||||
inputVolume = inputs[i].d[0] * inputs[i].d[1] * inputs[i].d[2];
|
||||
mFlattenedInputSize.push_back(inputVolume);
|
||||
mFlattenedOutputSize += mFlattenedInputSize[i];
|
||||
}
|
||||
}
|
||||
|
||||
bool supportsFormat(DataType type, PluginFormat format) const override
|
||||
{
|
||||
return (type == DataType::kFLOAT && format == PluginFormat::kNCHW);
|
||||
}
|
||||
|
||||
const char* getPluginType() const override { return FLATTENCONCAT_PLUGIN_NAME; }
|
||||
|
||||
const char* getPluginVersion() const override { return FLATTENCONCAT_PLUGIN_VERSION; }
|
||||
|
||||
void destroy() override {}
|
||||
|
||||
IPluginV2* clone() const override
|
||||
{
|
||||
return new FlattenConcat(mFlattenedInputSize.data(), mFlattenedInputSize.size(), mFlattenedOutputSize);
|
||||
}
|
||||
|
||||
void setPluginNamespace(const char* pluginNamespace) override
|
||||
{
|
||||
mPluginNamespace = pluginNamespace;
|
||||
}
|
||||
|
||||
const char* getPluginNamespace() const override
|
||||
{
|
||||
return mPluginNamespace.c_str();
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void write(char*& buffer, const T& val) const
|
||||
{
|
||||
*reinterpret_cast<T*>(buffer) = val;
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T read(const char*& buffer)
|
||||
{
|
||||
T val = *reinterpret_cast<const T*>(buffer);
|
||||
buffer += sizeof(T);
|
||||
return val;
|
||||
}
|
||||
|
||||
// Number of elements in each plugin input, flattened
|
||||
std::vector<int> mFlattenedInputSize;
|
||||
// Number of elements in output, flattened
|
||||
int mFlattenedOutputSize{0};
|
||||
// cuBLAS library handle
|
||||
cublasHandle_t mCublas;
|
||||
// We're not using TensorRT namespaces in
|
||||
// this sample, so it's just an empty string
|
||||
std::string mPluginNamespace = "";
|
||||
};
|
||||
|
||||
// PluginCreator boilerplate code for FlattenConcat plugin
|
||||
class FlattenConcatPluginCreator : public IPluginCreator
|
||||
{
|
||||
public:
|
||||
FlattenConcatPluginCreator()
|
||||
{
|
||||
mFC.nbFields = 0;
|
||||
mFC.fields = 0;
|
||||
}
|
||||
|
||||
~FlattenConcatPluginCreator() {}
|
||||
|
||||
const char* getPluginName() const override { return FLATTENCONCAT_PLUGIN_NAME; }
|
||||
|
||||
const char* getPluginVersion() const override { return FLATTENCONCAT_PLUGIN_VERSION; }
|
||||
|
||||
const PluginFieldCollection* getFieldNames() override { return &mFC; }
|
||||
|
||||
IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) override
|
||||
{
|
||||
return new FlattenConcat();
|
||||
}
|
||||
|
||||
IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override
|
||||
{
|
||||
|
||||
return new FlattenConcat(serialData, serialLength);
|
||||
}
|
||||
|
||||
void setPluginNamespace(const char* pluginNamespace) override
|
||||
{
|
||||
mPluginNamespace = pluginNamespace;
|
||||
}
|
||||
|
||||
const char* getPluginNamespace() const override
|
||||
{
|
||||
return mPluginNamespace.c_str();
|
||||
}
|
||||
|
||||
private:
|
||||
static PluginFieldCollection mFC;
|
||||
static std::vector<PluginField> mPluginAttributes;
|
||||
std::string mPluginNamespace = "";
|
||||
};
|
||||
|
||||
PluginFieldCollection FlattenConcatPluginCreator::mFC{};
|
||||
std::vector<PluginField> FlattenConcatPluginCreator::mPluginAttributes;
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(FlattenConcatPluginCreator);
|
13
requirements.txt
Normal file
13
requirements.txt
Normal file
@ -0,0 +1,13 @@
|
||||
numpy
|
||||
imutils
|
||||
scipy
|
||||
Flask
|
||||
paho-mqtt
|
||||
PyYAML
|
||||
matplotlib
|
||||
pyarrow
|
||||
pycuda
|
||||
tensorrt
|
||||
opencv-python
|
||||
tensorflow
|
||||
tflite_runtime
|
Loading…
Reference in New Issue
Block a user