Repository: bdtinc/maskcam
Branch: main
Commit: 4841c2c49235
Files: 106
Total size: 388.6 KB
Directory structure:
gitextract_82ysmahp/
├── .gitignore
├── Dockerfile
├── LICENSE.md
├── README.md
├── deepstream_plugin_yolov4/
│ ├── Makefile
│ ├── README.md
│ ├── kernels.cu
│ ├── nvdsinfer_yolo_engine.cpp
│ ├── nvdsparsebbox_Yolo.cpp
│ ├── trt_utils.cpp
│ ├── trt_utils.h
│ ├── yolo.cpp
│ ├── yolo.h
│ ├── yoloPlugins.cpp
│ └── yoloPlugins.h
├── docker/
│ ├── constraints.docker
│ ├── opencv_python-3.2.0.egg-info
│ ├── scikit-learn-0.19.1.egg-info
│ └── start.sh
├── docker-compose.yml
├── docs/
│ ├── BalenaOS-DevKit-Nano-Setup.md
│ ├── BalenaOS-Photon-Nano-Setup.md
│ ├── Custom-Container-Development.md
│ ├── Manual-Dependencies-Installation.md
│ └── Useful-Development-Scripts.md
├── maskcam/
│ ├── common.py
│ ├── config.py
│ ├── maskcam_filesave.py
│ ├── maskcam_fileserver.py
│ ├── maskcam_inference.py
│ ├── maskcam_streaming.py
│ ├── mqtt_commander.py
│ ├── mqtt_common.py
│ ├── prints.py
│ └── utils.py
├── maskcam_config.txt
├── maskcam_run.py
├── requirements-dev.in
├── requirements.in
├── requirements.txt
├── server/
│ ├── backend/
│ │ ├── Dockerfile
│ │ ├── alembic.ini
│ │ ├── app/
│ │ │ ├── api/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── exceptions.py
│ │ │ │ └── routes/
│ │ │ │ ├── device_routes.py
│ │ │ │ └── statistic_routes.py
│ │ │ ├── core/
│ │ │ │ └── config.py
│ │ │ ├── db/
│ │ │ │ ├── cruds/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── crud_device.py
│ │ │ │ │ ├── crud_statistic.py
│ │ │ │ │ └── crud_video_file.py
│ │ │ │ ├── migrations/
│ │ │ │ │ ├── env.py
│ │ │ │ │ ├── script.py.mako
│ │ │ │ │ └── versions/
│ │ │ │ │ ├── 6a4d853aabce_added_database.py
│ │ │ │ │ ├── 6d5c250f098c_added_device_file_server_address.py
│ │ │ │ │ ├── 8f58cd776eda_added_database.py
│ │ │ │ │ └── fb245977373f_added_video_file_table.py
│ │ │ │ ├── schema/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── base.py
│ │ │ │ │ ├── models.py
│ │ │ │ │ └── schemas.py
│ │ │ │ └── utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── enums.py
│ │ │ │ └── utils.py
│ │ │ ├── main.py
│ │ │ └── mqtt/
│ │ │ ├── broker.py
│ │ │ ├── publisher.py
│ │ │ └── subscriber.py
│ │ ├── prestart.sh
│ │ ├── requirements.txt
│ │ └── test_crud.py
│ ├── backend.env.template
│ ├── build_docker.sh
│ ├── database.env.template
│ ├── docker-compose.yml
│ ├── frontend/
│ │ ├── Dockerfile
│ │ ├── main.py
│ │ ├── requirements.txt
│ │ ├── session_manager.py
│ │ └── utils/
│ │ ├── api_utils.py
│ │ └── format_utils.py
│ ├── frontend.env.template
│ ├── mosquitto.conf
│ ├── server_setup.sh
│ └── stop_docker.sh
├── utils/
│ ├── combine_coco.py
│ ├── gst_capabilities.sh
│ ├── mqtt-test/
│ │ ├── broker.py
│ │ ├── publisher.py
│ │ └── suscriber.py
│ ├── onnx_fix_mobilenet.py
│ ├── remove_images_coco.py
│ ├── tf1_trt_inference.py
│ ├── tf2_trt_convert.py
│ └── tf_trt_convert.py
└── yolo/
├── config.py
├── config_images.yml
├── data/
│ ├── obj.data
│ └── obj.names
├── facemask-yolov4-tiny.cfg
├── facemask-yolov4.cfg
├── integrations/
│ └── yolo/
│ ├── detector_trt.py
│ ├── utils_pytorch.py
│ └── yolo_adaptor.py
├── run_yolo_images.py
└── train_cu90.sh
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Project particular ignores
backup/
videos/inputs/*
videos/outputs/*
images*/
*.mp4
.DS_Store
.vscode/
# Intermediate files to convert MobileNetV2 from TF->TF-TRT
utils/*.png
mobilenetv2/inference_graph_*/
mobilenetv2/converted_trt_*.pb
# Avoid weights, or engines
*.weights
*.trt
*.engine
*.onnx
# Environment files (not templates)
server/*.env
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
*.o
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
================================================
FILE: Dockerfile
================================================
# Installs maskcam on a BalenaOS container (devkit or Photon)
FROM balenalib/jetson-nano-ubuntu:20210201
# Don't prompt with any configuration questions
ENV DEBIAN_FRONTEND noninteractive
# Switch the nvidia apt source repos and
# install some utilities
RUN \
apt-get update && apt-get install -y \
lbzip2 wget tar python3 git
ENV UDEV=1
# Download and install BSP binaries for L4T 32.4.2
# This is mostly from Balena's Alan Boris at:
# https://github.com/balena-io-playground/jetson-nano-sample-new/blob/master/CUDA/Dockerfile
RUN apt-get update && apt-get install -y wget tar python3 libegl1 && \
wget https://developer.nvidia.com/embedded/L4T/r32_Release_v4.2/t210ref_release_aarch64/Tegra210_Linux_R32.4.2_aarch64.tbz2 && \
tar xf Tegra210_Linux_R32.4.2_aarch64.tbz2 && \
cd Linux_for_Tegra && \
sed -i 's/config.tbz2\"/config.tbz2\" --exclude=etc\/hosts --exclude=etc\/hostname/g' apply_binaries.sh && \
sed -i 's/install --owner=root --group=root \"${QEMU_BIN}\" \"${L4T_ROOTFS_DIR}\/usr\/bin\/\"/#install --owner=root --group=root \"${QEMU_BIN}\" \"${L4T_ROOTFS_DIR}\/usr\/bin\/\"/g' nv_tegra/nv-apply-debs.sh && \
sed -i 's/LC_ALL=C chroot . mount -t proc none \/proc/ /g' nv_tegra/nv-apply-debs.sh && \
sed -i 's/umount ${L4T_ROOTFS_DIR}\/proc/ /g' nv_tegra/nv-apply-debs.sh && \
sed -i 's/chroot . \// /g' nv_tegra/nv-apply-debs.sh && \
./apply_binaries.sh -r / --target-overlay && cd .. && \
rm -rf Tegra210_Linux_R32.4.2_aarch64.tbz2 && \
rm -rf Linux_for_Tegra && \
echo "/usr/lib/aarch64-linux-gnu/tegra" > /etc/ld.so.conf.d/nvidia-tegra.conf && \
echo "/usr/lib/aarch64-linux-gnu/tegra-egl" > /etc/ld.so.conf.d/nvidia-tegra-egl.conf && ldconfig
# Install GStreamer and remove unnecessary files
RUN apt-get install -y \
libssl1.0.0 \
libgstreamer1.0-0 \
gstreamer1.0-tools \
gstreamer1.0-plugins-good \
gstreamer1.0-plugins-bad \
gstreamer1.0-plugins-ugly \
gstreamer1.0-libav \
libgstrtspserver-1.0-0 \
libjansson4=2.11-1 \
cuda-toolkit-10-2 && \
ldconfig
RUN \
rm -rf /usr/src/nvidia/graphics_demos \
/usr/local/cuda-10.2/samples \
/usr/local/cuda-10.2/doc
# Install DeepStream
RUN apt-get install -y deepstream-5.0 && \
rm -rf /opt/nvidia/deepstream/deepstream-5.0/samples \
/usr/lib/aarch64-linux-gnu/libcudnn_static_v8.a \
/usr/lib/aarch64-linux-gnu/libcudnn_cnn_infer_static_v8.a \
/usr/lib/aarch64-linux-gnu/libnvinfer_static.a \
/usr/lib/aarch64-linux-gnu/libcudnn_adv_infer_static_v8.a \
/usr/lib/aarch64-linux-gnu/libcublas_static.a \
/usr/lib/aarch64-linux-gnu/libcudnn_adv_train_static_v8.a \
/usr/lib/aarch64-linux-gnu/libcudnn_ops_infer_static_v8.a \
/usr/lib/aarch64-linux-gnu/libcublasLt_static.a \
/usr/lib/aarch64-linux-gnu/libcudnn_cnn_train_static_v8.a \
/usr/lib/aarch64-linux-gnu/libcudnn_ops_train_static_v8.a \
/usr/lib/aarch64-linux-gnu/libmyelin_compiler_static.a \
/usr/lib/aarch64-linux-gnu/libmyelin_executor_static.a \
/usr/lib/aarch64-linux-gnu/libnvinfer_plugin_static.a && \
ldconfig
# Install system-level python3 packages
RUN apt-get update && apt-get install -y \
gir1.2-gst-rtsp-server-1.0 \
python3-pip \
python3-opencv \
python3-libnvinfer \
python3-scipy \
cython3 \
python3-sklearn \
python-gi-dev \
unzip && ldconfig
# These system-level packages don't provide egg-info files, add them manually so that pip knows
COPY docker/opencv_python-3.2.0.egg-info /usr/lib/python3/dist-packages/
COPY docker/scikit-learn-0.19.1.egg-info /usr/lib/python3/dist-packages/
# Install gst-python (python bindings for GStreamer)
RUN \
export GST_CFLAGS="-pthread -I/usr/include/gstreamer-1.0 -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include" && \
export GST_LIBS="-lgstreamer-1.0 -lgobject-2.0 -lglib-2.0" && \
git clone https://github.com/GStreamer/gst-python.git && \
cd gst-python && git checkout 1a8f48a && \
./autogen.sh PYTHON=python3 && \
./configure PYTHON=python3 && \
make && make install
# Install pyds (python bindings for DeepStream)
RUN cd /opt/nvidia/deepstream/deepstream-5.0/lib && python3 setup.py install
# Upgrade here to avoid re-running on code changes
RUN pip3 install --upgrade pip
# ---- Below steps are run before copying full maskcam code to allow layer caching ----
# Compile YOLOv4 plugin for DeepStream
COPY deepstream_plugin_yolov4 /deepstream_plugin_yolov4
ENV CUDA_VER=10.2
RUN cd /deepstream_plugin_yolov4 && make
# Get TensorRT engine (pretrained YOLOv4-tiny)
# Model trained on smaller dataset
# RUN wget -P / https://maskcam.s3.us-east-2.amazonaws.com/facemask_y4tiny_1024_608_fp16.trt
# Model trained on bigger dataset, merged with MAFA, WiderFace, Kaggle Medical Masks and FDDB
RUN wget -P / https://maskcam.s3.us-east-2.amazonaws.com/maskcam_y4t_1024_608_fp16.trt
# RUN wget -P / https://maskcam.s3.us-east-2.amazonaws.com/maskcam_y4t_1120_640_fp16.trt
# Install requirements with pinned versions
COPY requirements.txt /maskcam_requirements.txt
RUN pip3 install -r /maskcam_requirements.txt
# ---- Note: all layers below this will be re-generated each time code changes ----
# Copy full maskcam code
COPY . /opt/maskcam_1.0/
WORKDIR /opt/maskcam_1.0
# Move pre-copied files to their maskcam location
# NOTE: Ignoring errors with `exit 0` to avoid breaking on balena livepush
RUN rm -r deepstream_plugin_yolov4 && mv /deepstream_plugin_yolov4 . ; exit 0
RUN mv /*.trt yolo/ ; exit 0
# Preload library to avoids Gst errors "cannot allocate memory in static TLS block"
ENV LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
# Un-pinned versions of maskcam requirements (comment pip3 install above before this)
# RUN pip3 install -r requirements.in -c docker/constraints.docker
RUN chmod +x docker/start.sh
RUN chmod +x maskcam_run.py
CMD ["docker/start.sh"]
================================================
FILE: LICENSE.md
================================================
MIT License
Copyright (c) 2020, 2021 Berkeley Design Technology, Inc.. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: README.md
================================================
# MaskCam
MaskCam is a prototype reference design for a Jetson Nano-based smart camera system that measures crowd face mask usage in real-time, with all AI computation performed at the edge. MaskCam detects and tracks people in its field of view and determines whether they are wearing a mask via an object detection, tracking, and voting algorithm. It uploads statistics (not videos) to the cloud, where a web GUI can be used to monitor face mask compliance in the field of view. It saves interesting video snippets to local disk (e.g., a sudden influx of lots of people not wearing masks) and can optionally stream video via RTSP.
MaskCam can be run on a Jetson Nano Developer Kit, or on a Jetson Nano module (SOM) with the ConnectTech Photon carrier board. It was designed to use the Raspberry Pi High Quality Camera but will also work with pretty much any USB webcam that is supported on Linux.
The on-device software stack is mostly written in Python and runs under JetPack 4.4.1 or 4.5. Edge AI processing is handled by NVIDIA’s DeepStream video analytics framework, YOLOv4-tiny, and Tryolabs' [Norfair](https://github.com/tryolabs/norfair) tracker. MaskCam reports statistics to and receives commands from the cloud using MQTT and a web-based GUI. The software is containerized and for evaluation can be easily installed on a Jetson Nano DevKit using docker with just a couple of commands. For production, MaskCam can run under balenaOS, which makes it easy to manage and deploy multiple devices.
We urge you to try it out! It’s easy to install on a Jetson Nano Developer Kit and requires only a web cam. (The cloud-based statistics server and web GUI are optional, but are also dockerized and easy to install on any reasonable Linux system.) [See below for installation instructions.](#running-maskcam-from-a-container-on-a-jetson-nano-developer-kit)
MaskCam was developed by Berkeley Design Technology, Inc. (BDTI) and Tryolabs S.A., with development funded by NVIDIA. MaskCam is offered under the MIT License. For more information about MaskCam, please see the [report from BDTI](https://www.bdti.com/maskcam). If you have questions, please email us at maskcam@bdti.com. Thanks!
## Table of contents
- [Start Here!](#start-here)
- [Running MaskCam from a Container on a Jetson Nano Developer Kit](#running-maskcam-from-a-container-on-a-jetson-nano-developer-kit)
- [Viewing the Live Video Stream](#viewing-the-live-video-stream)
- [Setting Device Configuration Parameters](#setting-device-configuration-parameters)
- [MQTT Server Setup](#mqtt-and-web-server-setup)
- [Running the MQTT Broker and Web Server](#running-the-mqtt-broker-and-web-server)
- [Setup a Device with Your Server](#setup-a-device-with-your-server)
- [Checking MQTT Connection](#checking-mqtt-connection)
- [Working With the MaskCam Container](#working-with-the-maskcam-container)
- [Development Mode: Manually Running MaskCam](#development-mode-manually-running-maskcam)
- [Debugging: Running MaskCam Modules as Standalone Processes](#debugging-running-maskcam-modules-as-standalone-processes)
- [Additional Information](#additional-information)
- [Running on Jetson Nano Developer Kit Using BalenaOS](#running-on-jetson-nano-developer-kit-using-balenaos)
- [Custom Container Development](#custom-container-development)
- [Building From Source on Jetson Nano Developer Kit](#building-from-source-on-jetson-nano-developer-kit)
- [Using Your Own Detection Model](#using-your-own-detection-model)
- [Installing MaskCam Manually (Without a Container)](#installing-maskcam-manually-without-a-container)
- [Running on Jetson Nano with Photon Carrier Board](#running-on-jetson-nano-with-photon-carrier-board)
- [Useful Development Scripts](#useful-development-scripts)
- [Troubleshooting Common Errors](#troubleshooting-common-errors)
## Start Here!
### Running MaskCam from a Container on a Jetson Nano Developer Kit
The easiest and fastest way to get MaskCam running on your Jetson Nano Dev Kit is using our pre-built containers. You will need:
1. A Jetson Nano Dev Kit running JetPack 4.4.1 or 4.5
2. An external DC 5 volt, 4 amp power supply connected through the Dev Kit's barrel jack connector (J25). (See [these instructions](https://www.jetsonhacks.com/2019/04/10/jetson-nano-use-more-power/) on how to enable barrel jack power.) This software makes full use of the GPU, so it will not run with USB power.
3. A USB webcam attached to your Nano
4. Another computer with a program that can display RTSP streams -- we suggest [VLC](https://www.videolan.org/vlc/index.html) or [QuickTime](https://www.apple.com/quicktime/download/).
First, the MaskCam container needs to be downloaded from Docker Hub. On your Nano, run:
```
# This will take 10 minutes or more to download
sudo docker pull maskcam/maskcam-beta
```
Find your local Jetson Nano IP address using `ifconfig`. This address will be used later to view a live video stream from the camera and to interact with the Nano from a web server.
Make sure a USB camera is connected to the Nano, and then start MaskCam by running the following command. Make sure to substitute `` with your Nano's IP address.
```
# Connect USB camera before running this!
sudo docker run --runtime nvidia --privileged --rm -it --env MASKCAM_DEVICE_ADDRESS= -p 1883:1883 -p 8080:8080 -p 8554:8554 maskcam/maskcam-beta
```
The MaskCam container should start running the `maskcam_run.py` script, using the USB camera as the default input device (`/dev/video0`). It will produce various status output messages (and error messages, if it encounters problems). If there are errors, the process will automatically end after several seconds. Check the [Troubleshooting](#troubleshooting-common-errors) section for tips on resolving errors.
Otherwise, after 30 seconds or so, it should continually generate status messages (such as `Processed 100 frames...`). Leave it running (don't press `Ctrl+C`, but be aware that the device will start heating up) and continue to the next section to visualize the video!
### Viewing the Live Video Stream
If you scroll through the logs and don't see any errors, you should find a message like:
```Streaming at rtsp://aaa.bbb.ccc.ddd:8554/maskcam```
where `aaa.bbb.ccc.ddd` is the address that you provided in `MASKCAM_DEVICE_ADDRESS` previously. If you didn't provide an address, you'll see some unknown address label there, but the streaming will still work.
You can copy-paste that URL into your RSTP streaming viewer ([see how](https://user-images.githubusercontent.com/12506292/111346333-e14d8800-865c-11eb-9242-0ffa4f50547f.mp4) to do it with VLC) on another computer. If all goes well, you should be rewarded with streaming video of your Nano, with green boxes around faces wearing masks and red boxes around faces not wearing masks. An example video of the live streaming in action is shown below.
This video stream gives a general demonstration of how MaskCam works. However, MaskCam also has other features, such as the ability to send mask detection statistics to the cloud and view them through a web browser. If you'd like to see these features in action, you'll need to set up an MQTT server, which is covered in the [MQTT Server Setup section](#mqtt-and-web-server-setup).
If you encounter any errors running the live stream, check the [Troubleshooting](#troubleshooting-common-errors) section for tips on resolving errors.
### Setting Device Configuration Parameters
MaskCam uses environment variables to configure parameters without having to rebuild the container or manually change the configuration file each time the program is run. For example, in the previous section we set the `MASKCAM_DEVICE_ADDRESS` variable to indicate our Nano's IP address. A list of configurable parameters is shown in [maskcam_config.txt](maskcam_config.txt). The mapping between environment variable names and configuration parameters is defined in [maskcam/config.py](maskcam/config.py).
This section shows how to set environment variables to change configuration parameters. For example, if you want to use the `/dev/video1` camera device rather than `/dev/video0`, you can define `MASKCAM_INPUT` when running the container:
```
# Run with MASKCAM_INPUT and MASKCAM_DEVICE_ADDRESS
sudo docker run --runtime nvidia --privileged --rm -it --env MASKCAM_INPUT=v4l2:///dev/video1 --env MASKCAM_DEVICE_ADDRESS= -p 1883:1883 -p 8080:8080 -p 8554:8554 maskcam/maskcam-beta
```
Another useful input device that you might want to use is a CSI camera (like the Raspberry Pi camera), and in that case you need to set `MASKCAM_INPUT=argus://0` instead of the value shown above.
As another example, if you have an already set up our MQTT and web server (as shown in [MQTT Server Setup section](#mqtt-and-web-server-setup)), you need to define
two addtional environment variables, `MQTT_BROKER_IP` and `MQTT_DEVICE_NAME`. This allows your device to find the MQTT server and identify itself:
```
# Run with MQTT_BROKER_IP, MQTT_DEVICE_NAME, and MASKCAM_DEVICE_ADDRESS
sudo docker run --runtime nvidia --privileged --rm -it --env MQTT_BROKER_IP= --env MQTT_DEVICE_NAME= --env MASKCAM_DEVICE_ADDRESS= -p 1883:1883 -p 8080:8080 -p 8554:8554 maskcam/maskcam-beta
```
*If you have too many `--env` variables to add, it might be easier to create a [.env file](https://docs.docker.com/compose/env-file/) and point to it using the `--env-file` flag instead.*
## MQTT and Web Server Setup
### Running the MQTT Broker and Web Server
MaskCam is intended to be set up with a web server that stores mask detection statistics and allows users to remotely interact with the device. We wrote code for instantiating a [server](server/) that receives statistics from the device, stores them in a database, and has a web-based GUI frontend to display them. A screenshot of the frontend for an example device is shown below.
You can test out and explore this functionality by starting the server on a PC on your local network and pointing your Jetson Nano MaskCam device to it. This section gives instructions on how to do so. The MQTT broker and web server can be built and run on a Linux or OSX machine; we've tested it on Ubuntu 18.04LTS and OSX Big Sur. It can also be set up in an online AWS EC2 instance if you want to access it from outside of your local network.
The server consists of several docker containers that run together using [docker-compose](https://docs.docker.com/compose/install/). Install docker-compose on your machine by following the [installation instructions for your platform](https://docs.docker.com/compose/install/) before continuing. All other necessary packages and libraries will be automatically installed when you set up the containers in the next steps.
After installing docker-compose, clone this repo:
```
git clone https://github.com/bdtinc/maskcam.git
```
Go to the `server/` folder, which has all the needed components implemented on four containers: the Mosquitto broker, backend API, database, and Streamlit frontend.
These containers are configured using environment variables, so create the `.env` files by copying the default templates:
```
cd server
cp database.env.template database.env
cp frontend.env.template frontend.env
cp backend.env.template backend.env
```
The only file that needs to be changed is `database.env`. Open it with a text editor and replace the ``, ``, and `` fields with your own values. Here are some example values, but you better be more creative for security reasons:
```
POSTGRES_USER=postgres
POSTGRES_PASSWORD=some_password
POSTGRES_DB=maskcam
```
*NOTE:* If you want to change any of the `database.env` values after building the containers, the easiest thing to do is to delete the `pgdata` volume by running `docker volume rm pgdata`. It will also delete all stored database information and statistics.
After editing the database environment file, you're ready to build all the containers and run them with a single command:
```
sudo docker-compose up -d
```
Wait a couple minutes after issuing the command to make sure that all containers are built and running. Then, check the local IP of your computer by running the `ifconfig` command. (It should be an address that starts with `192.168...`, `10...` or `172...`.) This is the server IP that will be used for connecting to the server (since the server is hosted on this computer).
Next, open a web browser and enter the server IP to visit the frontend webpage:
```
http://:8501/
```
If you see a `ConnectionError` in the frontend, wait a couple more seconds and reload the page. The backend container can take some time to finish the database setup.
*NOTE:* If you're setting the server up on a remote instance like an AWS EC2, make sure you have ports `1883` (MQTT) and `8501` (web frontend) open for inbound and outbound traffic.
### Setup a Device With Your Server
Once you've got the server set up on a local machine (or in a AWS EC2 instance with a public IP), switch back to the Jetson Nano device. Run the MaskCam container using the following command, where `MQTT_BROKER_IP` is set to the IP of your server. (If you're using an AWS EC2 server, make sure to configure port `1883` for inbound and outbound traffic before running this command.)
```
# Run with MQTT_BROKER_IP, MQTT_DEVICE_NAME, and MASKCAM_DEVICE_ADDRESS
sudo docker run --runtime nvidia --privileged --rm -it --env MQTT_BROKER_IP= --env MQTT_DEVICE_NAME=my-jetson-1 --env MASKCAM_DEVICE_ADDRESS= -p 1883:1883 -p 8080:8080 -p 8554:8554 maskcam/maskcam-beta
```
And that's it. If the device has access to the server's IP, then you should see in the output logs some successful connection messages and then see your device listed in the drop-down menu of the frontend (reload the page if you don't see it). In the frontend, select `Group data by: Second` and hit `Refresh status` to see how the plot changes when new data arrives.
Check the next section if the MQTT connection is not established from the device to the server.
### Checking MQTT Connection
If you're running the MQTT broker on a machine in your local network, make sure its IP is accessible from the Jetson device:
```
ping
```
*NOTE:* Remember to use the network address of the computer you set up the server on, which you can check using the `ifconfig` command and looking for an address that should start with `192.168...`, `10...` or `172...`
If you're setting up a remote server and using its public IP to connect
from your device, chances are you're not setting properly the port `1883` to be opened for inbound and outbound traffic.
If you want to check the port is correctly configured, use `nc` from a local machine or your jetson:
```
nc -vz 1883
```
Remember you also need to open port `8501` to access the web server frontend from a web browser, as explained in the [server configuration section](#running-the-mqtt-broker-and-web-server) (but that's not relevant for the MQTT communication with the device).
## Working With the MaskCam Container
### Development Mode: Manually Running MaskCam
If you want to play around with the code, you probably don't want the container to automatically start running the `maskcam_run.py` script.
The easiest way to achieve that, is by defining the environment variable `DEV_MODE=1`:
```
docker run --runtime nvidia --privileged --rm -it --env DEV_MODE=1 -p 1883:1883 -p 8080:8080 -p 8554:8554 maskcam/maskcam-beta
```
This will cause the container to start a `/bin/bash` prompt (see [docker/start.sh](docker/start.sh) for details), from which you could run the script manually, or any
of its sub-modules as standalone processes:
```
# e.g: Run with a different input instead of default `/dev/video0`
./maskcam_run.py v4l2:///dev/video1
# e.g: Disable tracker to visualize raw detections and scores
MASKCAM_DISABLE_TRACKER=1 ./maskcam_run.py
```
### Debugging: Running MaskCam Modules as Standalone Processes
The script `maskcam_run.py`, which is the main entrypoint for the MaskCam software, has two roles:
- Handles all the MQTT communication (send stats and receive commands)
- Orchestrates all other processes that live under `maskcam/maskcam_*.py`.
But you can actually run any of those modules as standalone processes, which can be easier for debugging.
You need to set `DEV_MODE=1` as explained in the previous section to access the container prompt, and then you can run the python modules:
```
# e.g: Run only the static file server process
python3 -m maskcam.maskcam_fileserver
# e.g: Serve another directory to test
python3 -m maskcam.maskcam_fileserver /tmp
# e.g: Run only the inference and streaming processes
python3 -m maskcam.maskcam_streaming &
# Hit enter until you get a prompt and then:
python3 -m maskcam.maskcam_inference
```
**Note:** In the last example, `maskcam_streaming` is running on background,
so it will not terminate if you press `Ctrl+C` (only `maskcam_inference` will,
since it's running on the foreground).
To check that the streaming is still running and then bring it to foreground to terminate it, run:
```
jobs
fg %1
# Now you can hit Ctrl+C to terminate streaming
```
## Additional Information
Further information about working with and customizing MaskCam is provided on separate pages in the [docs](docs) folder. This section gives a brief description and link to each page.
### Running on Jetson Nano Developer Kit Using BalenaOS
[BalenaOS](https://www.balena.io/os/) is a lightweight operating system designed for running containers on embedded devices. It provides several advantages for fleet deployment and management, especially when combined with balena's balenaCloud mangament system. If you'd like to try running MaskCam with balenaOS instead of JetPack OS on your Jetson Nano, please follow the instructions at [BalenaOS-DevKit-Nano-Setup.md](docs/BalenaOS-DevKit-Nano-Setup.md).
### Custom Container Development
MaskCam is intended to be a reference design for any connected smart camera application. You can create your own application by starting from our pre-built container, modifying it to add the code files and packages needed for your program, and then re-building the container. The [Custom-Container-Development.md](docs/Custom-Container-Development.md) gives instructions on how to build your own container based off MaskCam.
#### Building From Source on Jetson Nano Developer Kit
Please see [How to Build your Own Container from Source on the Jetson Nano](https://github.com/bdtinc/maskcam/blob/main/docs/Custom-Container-Development.md#how-to-build-your-own-container-from-source-on-the-jetson-nano) for instructions on how to build a custom MaskCam container on your Jetson Nano Developer Kit.
#### Using Your Own Detection Model
Please see [How to Use Your Own Detection Model](https://github.com/bdtinc/maskcam/blob/main/docs/Custom-Container-Development.md#how-to-use-your-own-detection-model) for instructions on how to use your own detection model rather than our mask detection model.
### Installing MaskCam Manually (Without a Container)
MaskCam can also be installed manually, rather than by downloading our pre-built container. Using a manual installation of MaskCam can help with development if you'd prefer not to work with containers. If you'd like to install MaskCam without using containers, please see [docs/Manual-Dependencies-Installation.md](docs/Manual-Dependencies-Installation.md).
### Running on Jetson Nano with Photon Carrier Board
For our hardware prototype of MaskCam, we used a Jetson Nano module and a [Connect Tech Photon carrier board](https://connecttech.com/product/photon-jetson-nano-ai-camera-platform/), rather than the Jetson Nano Developer Kit. We used the Photon because the Developer Kit is not sold or warrantied for production use. Using the Photon allowed us to quickly create a production-ready prototype using off-the-shelf hardware. If you have a Photon carrier board and Jetson Nano module, you can install MaskCam on them by using the setup instructions at [docs/Photon-Nano-Setup.md](docs/Photon-Nano-Setup.md).
### Useful Development Scripts
During development, some scripts were produced which might be useful for other developers to debug or update the software. These include an MQTT sniffer, a script to run the TensorRT model on images, and to convert a model trained with the original YOLO Darknet implementation to TensorRT format. Basic usage for all these tools is covered on [docs/Useful-Development-Scripts.md](docs/Useful-Development-Scripts.md).
## Troubleshooting Common Errors
If you run into any errors or issues while working with MaskCam, this section gives common errors and their solutions.
MaskCam consists of many different processes running in parallel. As a consequence, when there's an error on a particular process, all of them will be sent termination signals and finish gracefully. This means that you need to scroll up through the output to find out the original error that caused a failure. It should be very notorious, flagged as a red **ERROR** log entry, followed by the name of the process that failed and a message.
#### Error: camera not connected/not recognized
If you see an error containing the message `Cannot identify device '/dev/video0'`, among other Gst and v4l messages, it means the program couldn't find the camera device. Make sure your camera is connected to the Nano and recognized by the host Ubuntu OS by issuing `ls /dev` and checking if `/dev/video0` is present in the output.
#### Error: not running in privileged mode
In this case, you'll see a bunch of annoying messages like:
```
Error: Can't initialize nvrm channel
Couldn't create ddkvic Session: Cannot allocate memory
nvbuf_utils: Could not create Default NvBufferSession
```
You'll probably see multiple failures in other MaskCam processes as well. To resolve these errors, make sure you're running docker with the `--privileged` flag, as described in the [first section](#running-maskcam-from-a-container-on-a-jetson-nano-developer-kit).
#### Error: reason not negotiated/camera capabilities
If you get an error that looks like: `v4l-camera-source / reason not-negotiated`
Then the problem is that the USB camera you're using doesn't support the default `camera-framerate=30` (frames per second). If you don't have another camera, try running the script under utils/gst_capabilities.sh and find the lines with type `video/x-raw ...`
Find any suitable `framerate=X/1` (with `X` being an integer like 24, 15, etc.) and set the corresponding configuration parameter with `--env MASKCAM_CAMERA_FRAMERATE=X` (see [previous section](#setting-device-configuration-parameters)).
#### Error: Streaming or file server are not accessible (nothing else seems to fail)
Make sure you're mapping the right ports from the container, with the `-p container_port:host_port` parameters indicated in the previous sections. The default port numbers, that should be exposed by the container, are configured in [maskcam_config.txt](maskcam_config.txt) as:
```
fileserver-port=8080
streaming-port=8554
mqtt-broker-port=1883
```
These port mappings are why we use `docker run ... -p 1883:1883 -p 8080:8080 -p 8554:8554 ...` with the run command. Remember that all the ports can be overriden using environment variables, as described in the [previous section](#setting-device-configuration-parameters). Other ports like `udp-port-*` are not intended to be accessible from outside the container, they are used for communication between the inference process and the streaming and file-saving processes.
#### Other Errors
Sometimes after restarting the process or the whole docker container many times, some GPU resources can get stuck and cause unexpected errors. If that's the case, try rebooting the device and running the container again. If you find that the container fails systematically after running some sequence, please don't hesitate to [report an Issue](https://github.com/bdtinc/maskcam/issues) with the relevant context and we'll try to reproduce and fix it.
## Questions? Need Help?
Email us at maskcam@bdti.com, and be sure to check out our [independent report on the development of MaskCam](https://bdti.com/maskcam)!
================================================
FILE: deepstream_plugin_yolov4/Makefile
================================================
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
################################################################################
CUDA_VER?=
ifeq ($(CUDA_VER),)
$(error "CUDA_VER is not set")
endif
CC:= g++
NVCC:=/usr/local/cuda-$(CUDA_VER)/bin/nvcc
CFLAGS:= -Wall -std=c++11 -shared -fPIC -Wno-error=deprecated-declarations
CFLAGS+= -I../../includes -I/usr/local/cuda-$(CUDA_VER)/include -I/opt/nvidia/deepstream/deepstream-5.0/sources/includes
LIBS:= -lnvinfer_plugin -lnvinfer -lnvparsers -L/usr/local/cuda-$(CUDA_VER)/lib64 -lcudart -lcublas -lstdc++fs -L/opt/nvidia/deepstream/deepstream-5.0/lib
LFLAGS:= -shared -Wl,--start-group $(LIBS) -Wl,--end-group
INCS:= $(wildcard *.h)
SRCFILES:= nvdsinfer_yolo_engine.cpp \
nvdsparsebbox_Yolo.cpp \
yoloPlugins.cpp \
trt_utils.cpp \
yolo.cpp \
kernels.cu
TARGET_LIB:= libnvdsinfer_custom_impl_Yolo.so
TARGET_OBJS:= $(SRCFILES:.cpp=.o)
TARGET_OBJS:= $(TARGET_OBJS:.cu=.o)
all: $(TARGET_LIB)
%.o: %.cpp $(INCS) Makefile
$(CC) -c -o $@ $(CFLAGS) $<
%.o: %.cu $(INCS) Makefile
$(NVCC) -c -o $@ --compiler-options '-fPIC' $<
$(TARGET_LIB) : $(TARGET_OBJS)
$(CC) -o $@ $(TARGET_OBJS) $(LFLAGS)
clean:
rm -rf $(TARGET_LIB)
================================================
FILE: deepstream_plugin_yolov4/README.md
================================================
## YOLOv4 plugin for DeepStream
This plugin was obtained from: https://github.com/Tianxiaomo/pytorch-YOLOv4/tree/master/DeepStream
It must be compiled locally on the jetson device:
```
export CUDA_VER=10.2
make
```
================================================
FILE: deepstream_plugin_yolov4/kernels.cu
================================================
/*
* Copyright (c) 2018-2019 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*
*/
#include
#include
#include
#include
#include
inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); }
__global__ void gpuYoloLayerV3(const float* input, float* output, const uint gridSize, const uint numOutputClasses,
const uint numBBoxes)
{
uint x_id = blockIdx.x * blockDim.x + threadIdx.x;
uint y_id = blockIdx.y * blockDim.y + threadIdx.y;
uint z_id = blockIdx.z * blockDim.z + threadIdx.z;
if ((x_id >= gridSize) || (y_id >= gridSize) || (z_id >= numBBoxes))
{
return;
}
const int numGridCells = gridSize * gridSize;
const int bbindex = y_id * gridSize + x_id;
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]
= __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]);
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]);
for (uint i = 0; i < numOutputClasses; ++i)
{
output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]
= sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]);
}
}
cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
const uint& numOutputClasses, const uint& numBBoxes,
uint64_t outputSize, cudaStream_t stream);
cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint& batchSize, const uint& gridSize,
const uint& numOutputClasses, const uint& numBBoxes,
uint64_t outputSize, cudaStream_t stream)
{
dim3 threads_per_block(16, 16, 4);
dim3 number_of_blocks((gridSize / threads_per_block.x) + 1,
(gridSize / threads_per_block.y) + 1,
(numBBoxes / threads_per_block.z) + 1);
for (unsigned int batch = 0; batch < batchSize; ++batch)
{
gpuYoloLayerV3<<>>(
reinterpret_cast(input) + (batch * outputSize),
reinterpret_cast(output) + (batch * outputSize), gridSize, numOutputClasses,
numBBoxes);
}
return cudaGetLastError();
}
================================================
FILE: deepstream_plugin_yolov4/nvdsinfer_yolo_engine.cpp
================================================
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nvdsinfer_custom_impl.h"
#include "nvdsinfer_context.h"
#include "yoloPlugins.h"
#include "yolo.h"
#include
#define USE_CUDA_ENGINE_GET_API 1
static bool getYoloNetworkInfo (NetworkInfo &networkInfo, const NvDsInferContextInitParams* initParams)
{
std::string yoloCfg = initParams->customNetworkConfigFilePath;
std::string yoloType;
std::transform (yoloCfg.begin(), yoloCfg.end(), yoloCfg.begin(), [] (uint8_t c) {
return std::tolower (c);});
if (yoloCfg.find("yolov2") != std::string::npos) {
if (yoloCfg.find("yolov2-tiny") != std::string::npos)
yoloType = "yolov2-tiny";
else
yoloType = "yolov2";
} else if (yoloCfg.find("yolov3") != std::string::npos) {
if (yoloCfg.find("yolov3-tiny") != std::string::npos)
yoloType = "yolov3-tiny";
else
yoloType = "yolov3";
} else {
std::cerr << "Yolo type is not defined from config file name:"
<< yoloCfg << std::endl;
return false;
}
networkInfo.networkType = yoloType;
networkInfo.configFilePath = initParams->customNetworkConfigFilePath;
networkInfo.wtsFilePath = initParams->modelFilePath;
networkInfo.deviceType = (initParams->useDLA ? "kDLA" : "kGPU");
networkInfo.inputBlobName = "data";
if (networkInfo.configFilePath.empty() ||
networkInfo.wtsFilePath.empty()) {
std::cerr << "Yolo config file or weights file is NOT specified."
<< std::endl;
return false;
}
if (!fileExists(networkInfo.configFilePath) ||
!fileExists(networkInfo.wtsFilePath)) {
std::cerr << "Yolo config file or weights file is NOT exist."
<< std::endl;
return false;
}
return true;
}
#if !USE_CUDA_ENGINE_GET_API
IModelParser* NvDsInferCreateModelParser(
const NvDsInferContextInitParams* initParams) {
NetworkInfo networkInfo;
if (!getYoloNetworkInfo(networkInfo, initParams)) {
return nullptr;
}
return new Yolo(networkInfo);
}
#else
extern "C"
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
const NvDsInferContextInitParams * const initParams,
nvinfer1::DataType dataType,
nvinfer1::ICudaEngine *& cudaEngine);
extern "C"
bool NvDsInferYoloCudaEngineGet(nvinfer1::IBuilder * const builder,
const NvDsInferContextInitParams * const initParams,
nvinfer1::DataType dataType,
nvinfer1::ICudaEngine *& cudaEngine)
{
NetworkInfo networkInfo;
if (!getYoloNetworkInfo(networkInfo, initParams)) {
return false;
}
Yolo yolo(networkInfo);
cudaEngine = yolo.createEngine (builder);
if (cudaEngine == nullptr)
{
std::cerr << "Failed to build cuda engine on "
<< networkInfo.configFilePath << std::endl;
return false;
}
return true;
}
#endif
================================================
FILE: deepstream_plugin_yolov4/nvdsparsebbox_Yolo.cpp
================================================
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include
#include
#include
#include
#include
#include
#include
#include "nvdsinfer_custom_impl.h"
#include "trt_utils.h"
static const int NUM_CLASSES_YOLO = 4; // This is just for checkings, keep updated by hand
extern "C" bool NvDsInferParseCustomYoloV3(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);
extern "C" bool NvDsInferParseCustomYoloV3Tiny(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);
extern "C" bool NvDsInferParseCustomYoloV2(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);
extern "C" bool NvDsInferParseCustomYoloV2Tiny(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);
extern "C" bool NvDsInferParseCustomYoloTLT(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);
extern "C" bool NvDsInferParseCustomYoloV4(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList);
/* This is a sample bounding box parsing function for the sample YoloV3 detector model */
static NvDsInferParseObjectInfo convertBBox(const float &bx, const float &by, const float &bw,
const float &bh, const int &stride, const uint &netW,
const uint &netH)
{
NvDsInferParseObjectInfo b;
// Restore coordinates to network input resolution
float xCenter = bx * stride;
float yCenter = by * stride;
float x0 = xCenter - bw / 2;
float y0 = yCenter - bh / 2;
float x1 = x0 + bw;
float y1 = y0 + bh;
x0 = clamp(x0, 0, netW);
y0 = clamp(y0, 0, netH);
x1 = clamp(x1, 0, netW);
y1 = clamp(y1, 0, netH);
b.left = x0;
b.width = clamp(x1 - x0, 0, netW);
b.top = y0;
b.height = clamp(y1 - y0, 0, netH);
return b;
}
static void addBBoxProposal(const float bx, const float by, const float bw, const float bh,
const uint stride, const uint &netW, const uint &netH, const int maxIndex,
const float maxProb, std::vector &binfo)
{
NvDsInferParseObjectInfo bbi = convertBBox(bx, by, bw, bh, stride, netW, netH);
if (bbi.width < 1 || bbi.height < 1)
return;
bbi.detectionConfidence = maxProb;
bbi.classId = maxIndex;
binfo.push_back(bbi);
}
static std::vector
decodeYoloV2Tensor(
const float *detections, const std::vector &anchors,
const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes,
const uint numOutputClasses, const uint &netW,
const uint &netH)
{
std::vector binfo;
for (uint y = 0; y < gridSizeH; ++y)
{
for (uint x = 0; x < gridSizeW; ++x)
{
for (uint b = 0; b < numBBoxes; ++b)
{
const float pw = anchors[b * 2];
const float ph = anchors[b * 2 + 1];
const int numGridCells = gridSizeH * gridSizeW;
const int bbindex = y * gridSizeW + x;
const float bx = x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)];
const float by = y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)];
const float bw = pw * exp(detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)]);
const float bh = ph * exp(detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)]);
const float objectness = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)];
float maxProb = 0.0f;
int maxIndex = -1;
for (uint i = 0; i < numOutputClasses; ++i)
{
float prob = (detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + (5 + i))]);
if (prob > maxProb)
{
maxProb = prob;
maxIndex = i;
}
}
maxProb = objectness * maxProb;
addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo);
}
}
}
return binfo;
}
static std::vector
decodeYoloV3Tensor(
const float *detections, const std::vector &mask, const std::vector &anchors,
const uint gridSizeW, const uint gridSizeH, const uint stride, const uint numBBoxes,
const uint numOutputClasses, const uint &netW,
const uint &netH)
{
std::vector binfo;
for (uint y = 0; y < gridSizeH; ++y)
{
for (uint x = 0; x < gridSizeW; ++x)
{
for (uint b = 0; b < numBBoxes; ++b)
{
const float pw = anchors[mask[b] * 2];
const float ph = anchors[mask[b] * 2 + 1];
const int numGridCells = gridSizeH * gridSizeW;
const int bbindex = y * gridSizeW + x;
const float bx = x + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 0)];
const float by = y + detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 1)];
const float bw = pw * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 2)];
const float bh = ph * detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 3)];
const float objectness = detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + 4)];
float maxProb = 0.0f;
int maxIndex = -1;
for (uint i = 0; i < numOutputClasses; ++i)
{
float prob = (detections[bbindex + numGridCells * (b * (5 + numOutputClasses) + (5 + i))]);
if (prob > maxProb)
{
maxProb = prob;
maxIndex = i;
}
}
maxProb = objectness * maxProb;
addBBoxProposal(bx, by, bw, bh, stride, netW, netH, maxIndex, maxProb, binfo);
}
}
}
return binfo;
}
static inline std::vector
SortLayers(const std::vector &outputLayersInfo)
{
std::vector outLayers;
for (auto const &layer : outputLayersInfo)
{
outLayers.push_back(&layer);
}
std::sort(outLayers.begin(), outLayers.end(),
[](const NvDsInferLayerInfo *a, const NvDsInferLayerInfo *b) {
return a->inferDims.d[1] < b->inferDims.d[1];
});
return outLayers;
}
static bool NvDsInferParseYoloV3(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList,
const std::vector &anchors,
const std::vector> &masks)
{
const uint kNUM_BBOXES = 3;
const std::vector sortedLayers =
SortLayers(outputLayersInfo);
if (sortedLayers.size() != masks.size())
{
std::cerr << "ERROR: yoloV3 output layer.size: " << sortedLayers.size()
<< " does not match mask.size: " << masks.size() << std::endl;
return false;
}
if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured)
{
std::cerr << "WARNING: Num classes mismatch. Configured:"
<< detectionParams.numClassesConfigured
<< ", detected by network: " << NUM_CLASSES_YOLO << std::endl;
}
std::vector objects;
for (uint idx = 0; idx < masks.size(); ++idx)
{
const NvDsInferLayerInfo &layer = *sortedLayers[idx]; // 255 x Grid x Grid
assert(layer.inferDims.numDims == 3);
const uint gridSizeH = layer.inferDims.d[1];
const uint gridSizeW = layer.inferDims.d[2];
const uint stride = DIVUP(networkInfo.width, gridSizeW);
assert(stride == DIVUP(networkInfo.height, gridSizeH));
std::vector outObjs =
decodeYoloV3Tensor((const float *)(layer.buffer), masks[idx], anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES,
NUM_CLASSES_YOLO, networkInfo.width, networkInfo.height);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
}
objectList = objects;
return true;
}
static NvDsInferParseObjectInfo convertBBoxYoloV4(const float &bx1, const float &by1, const float &bx2,
const float &by2, const uint &netW, const uint &netH)
{
NvDsInferParseObjectInfo b;
// Restore coordinates to network input resolution
float x1 = bx1 * netW;
float y1 = by1 * netH;
float x2 = bx2 * netW;
float y2 = by2 * netH;
x1 = clamp(x1, 0, netW);
y1 = clamp(y1, 0, netH);
x2 = clamp(x2, 0, netW);
y2 = clamp(y2, 0, netH);
b.left = x1;
b.width = clamp(x2 - x1, 0, netW);
b.top = y1;
b.height = clamp(y2 - y1, 0, netH);
return b;
}
static void addBBoxProposalYoloV4(const float bx, const float by, const float bw, const float bh,
const uint &netW, const uint &netH, const int maxIndex,
const float maxProb, std::vector &binfo)
{
NvDsInferParseObjectInfo bbi = convertBBoxYoloV4(bx, by, bw, bh, netW, netH);
if (bbi.width < 1 || bbi.height < 1)
return;
bbi.detectionConfidence = maxProb;
bbi.classId = maxIndex;
binfo.push_back(bbi);
}
static std::vector
decodeYoloV4Tensor(
const float *boxes, const float *scores,
const uint num_bboxes, NvDsInferParseDetectionParams const &detectionParams,
const uint &netW, const uint &netH)
{
std::vector binfo;
uint bbox_location = 0;
uint score_location = 0;
for (uint b = 0; b < num_bboxes; ++b)
{
float bx1 = boxes[bbox_location];
float by1 = boxes[bbox_location + 1];
float bx2 = boxes[bbox_location + 2];
float by2 = boxes[bbox_location + 3];
float maxProb = 0.0f;
int maxIndex = -1;
for (uint c = 0; c < detectionParams.numClassesConfigured; ++c)
{
float prob = scores[score_location + c];
if (prob > maxProb)
{
maxProb = prob;
maxIndex = c;
}
}
if (maxProb > detectionParams.perClassPreclusterThreshold[maxIndex])
{
addBBoxProposalYoloV4(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
}
bbox_location += 4;
score_location += detectionParams.numClassesConfigured;
}
return binfo;
}
/* C-linkage to prevent name-mangling */
static bool NvDsInferParseYoloV4(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured)
{
std::cerr << "WARNING: Num classes mismatch. Configured:"
<< detectionParams.numClassesConfigured
<< ", detected by network: " << NUM_CLASSES_YOLO << std::endl;
}
std::vector objects;
const NvDsInferLayerInfo &boxes = outputLayersInfo[0]; // num_boxes x 4
const NvDsInferLayerInfo &scores = outputLayersInfo[1]; // num_boxes x num_classes
const NvDsInferLayerInfo &subbox = outputLayersInfo[2];
//* printf("%d\n", subbox.inferDims.numDims);
// 3 dimensional: [num_boxes, 1, 4]
assert(boxes.inferDims.numDims == 3);
// 2 dimensional: [num_boxes, num_classes]
assert(scores.inferDims.numDims == 2);
// The second dimension should be num_classes
assert(detectionParams.numClassesConfigured == scores.inferDims.d[1]);
uint num_bboxes = boxes.inferDims.d[0];
// std::cout << "Network Info: " << networkInfo.height << " " << networkInfo.width << std::endl;
std::vector outObjs =
decodeYoloV4Tensor(
(const float *)(boxes.buffer), (const float *)(scores.buffer), num_bboxes, detectionParams,
networkInfo.width, networkInfo.height);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
objectList = objects;
return true;
}
extern "C" bool NvDsInferParseCustomYoloV4(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
return NvDsInferParseYoloV4(
outputLayersInfo, networkInfo, detectionParams, objectList);
}
extern "C" bool NvDsInferParseCustomYoloV3(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
static const std::vector kANCHORS = {
10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0,
45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0};
static const std::vector> kMASKS = {
{6, 7, 8},
{3, 4, 5},
{0, 1, 2}};
return NvDsInferParseYoloV3(
outputLayersInfo, networkInfo, detectionParams, objectList,
kANCHORS, kMASKS);
}
extern "C" bool NvDsInferParseCustomYoloV3Tiny(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
static const std::vector kANCHORS = {
10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319};
static const std::vector> kMASKS = {
{3, 4, 5},
//{0, 1, 2}}; // as per output result, select {1,2,3}
{1, 2, 3}};
return NvDsInferParseYoloV3(
outputLayersInfo, networkInfo, detectionParams, objectList,
kANCHORS, kMASKS);
}
static bool NvDsInferParseYoloV2(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
// copy anchor data from yolov2.cfg file
std::vector anchors = {0.57273, 0.677385, 1.87446, 2.06253, 3.33843,
5.47434, 7.88282, 3.52778, 9.77052, 9.16828};
const uint kNUM_BBOXES = 5;
if (outputLayersInfo.empty())
{
std::cerr << "Could not find output layer in bbox parsing" << std::endl;
;
return false;
}
const NvDsInferLayerInfo &layer = outputLayersInfo[0];
if (NUM_CLASSES_YOLO != detectionParams.numClassesConfigured)
{
std::cerr << "WARNING: Num classes mismatch. Configured:"
<< detectionParams.numClassesConfigured
<< ", detected by network: " << NUM_CLASSES_YOLO << std::endl;
}
assert(layer.inferDims.numDims == 3);
const uint gridSizeH = layer.inferDims.d[1];
const uint gridSizeW = layer.inferDims.d[2];
const uint stride = DIVUP(networkInfo.width, gridSizeW);
assert(stride == DIVUP(networkInfo.height, gridSizeH));
for (auto &anchor : anchors)
{
anchor *= stride;
}
std::vector objects =
decodeYoloV2Tensor((const float *)(layer.buffer), anchors, gridSizeW, gridSizeH, stride, kNUM_BBOXES,
NUM_CLASSES_YOLO, networkInfo.width, networkInfo.height);
objectList = objects;
return true;
}
extern "C" bool NvDsInferParseCustomYoloV2(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
return NvDsInferParseYoloV2(
outputLayersInfo, networkInfo, detectionParams, objectList);
}
extern "C" bool NvDsInferParseCustomYoloV2Tiny(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
return NvDsInferParseYoloV2(
outputLayersInfo, networkInfo, detectionParams, objectList);
}
extern "C" bool NvDsInferParseCustomYoloTLT(
std::vector const &outputLayersInfo,
NvDsInferNetworkInfo const &networkInfo,
NvDsInferParseDetectionParams const &detectionParams,
std::vector &objectList)
{
if (outputLayersInfo.size() != 4)
{
std::cerr << "Mismatch in the number of output buffers."
<< "Expected 4 output buffers, detected in the network :"
<< outputLayersInfo.size() << std::endl;
return false;
}
const int topK = 200;
const int *keepCount = static_cast(outputLayersInfo.at(0).buffer);
const float *boxes = static_cast(outputLayersInfo.at(1).buffer);
const float *scores = static_cast(outputLayersInfo.at(2).buffer);
const float *cls = static_cast(outputLayersInfo.at(3).buffer);
for (int i = 0; (i < keepCount[0]) && (objectList.size() <= topK); ++i)
{
const float *loc = &boxes[0] + (i * 4);
const float *conf = &scores[0] + i;
const float *cls_id = &cls[0] + i;
if (conf[0] > 1.001)
continue;
if ((loc[0] < 0) || (loc[1] < 0) || (loc[2] < 0) || (loc[3] < 0))
continue;
if ((loc[0] > networkInfo.width) || (loc[2] > networkInfo.width) || (loc[1] > networkInfo.height) || (loc[3] > networkInfo.width))
continue;
if ((loc[2] < loc[0]) || (loc[3] < loc[1]))
continue;
if (((loc[3] - loc[1]) > networkInfo.height) || ((loc[2] - loc[0]) > networkInfo.width))
continue;
NvDsInferParseObjectInfo curObj{static_cast(cls_id[0]),
loc[0], loc[1], (loc[2] - loc[0]),
(loc[3] - loc[1]), conf[0]};
objectList.push_back(curObj);
}
return true;
}
/* Check that the custom function has been defined correctly */
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV4);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV3);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV3Tiny);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV2);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloV2Tiny);
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomYoloTLT);
================================================
FILE: deepstream_plugin_yolov4/trt_utils.cpp
================================================
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "trt_utils.h"
#include
#include
#include
#include
#include
#include
#include "NvInferPlugin.h"
static void leftTrim(std::string& s)
{
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
}
static void rightTrim(std::string& s)
{
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
}
std::string trim(std::string s)
{
leftTrim(s);
rightTrim(s);
return s;
}
float clamp(const float val, const float minVal, const float maxVal)
{
assert(minVal <= maxVal);
return std::min(maxVal, std::max(minVal, val));
}
bool fileExists(const std::string fileName, bool verbose)
{
if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName)))
{
if (verbose) std::cout << "File does not exist : " << fileName << std::endl;
return false;
}
return true;
}
std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType)
{
assert(fileExists(weightsFilePath));
std::cout << "Loading pre-trained weights..." << std::endl;
std::ifstream file(weightsFilePath, std::ios_base::binary);
assert(file.good());
std::string line;
if (networkType == "yolov2")
{
// Remove 4 int32 bytes of data from the stream belonging to the header
file.ignore(4 * 4);
}
else if ((networkType == "yolov3") || (networkType == "yolov3-tiny")
|| (networkType == "yolov2-tiny"))
{
// Remove 5 int32 bytes of data from the stream belonging to the header
file.ignore(4 * 5);
}
else
{
std::cout << "Invalid network type" << std::endl;
assert(0);
}
std::vector weights;
char floatWeight[4];
while (!file.eof())
{
file.read(floatWeight, 4);
assert(file.gcount() == 4);
weights.push_back(*reinterpret_cast(floatWeight));
if (file.peek() == std::istream::traits_type::eof()) break;
}
std::cout << "Loading weights of " << networkType << " complete!"
<< std::endl;
std::cout << "Total Number of weights read : " << weights.size() << std::endl;
return weights;
}
std::string dimsToString(const nvinfer1::Dims d)
{
std::stringstream s;
assert(d.nbDims >= 1);
for (int i = 0; i < d.nbDims - 1; ++i)
{
s << std::setw(4) << d.d[i] << " x";
}
s << std::setw(4) << d.d[d.nbDims - 1];
return s.str();
}
void displayDimType(const nvinfer1::Dims d)
{
std::cout << "(" << d.nbDims << ") ";
for (int i = 0; i < d.nbDims; ++i)
{
switch (d.type[i])
{
case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break;
case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break;
case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break;
case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break;
}
}
std::cout << std::endl;
}
int getNumChannels(nvinfer1::ITensor* t)
{
nvinfer1::Dims d = t->getDimensions();
assert(d.nbDims == 3);
return d.d[0];
}
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
{
assert(inputDims.nbDims == 3);
return inputDims.d[0] * inputDims.d[1] * inputDims.d[2];
}
nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map& block,
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
{
assert(block.at("type") == "maxpool");
assert(block.find("size") != block.end());
assert(block.find("stride") != block.end());
int size = std::stoi(block.at("size"));
int stride = std::stoi(block.at("stride"));
nvinfer1::IPoolingLayer* pool
= network->addPooling(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
assert(pool);
std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
pool->setStride(nvinfer1::DimsHW{stride, stride});
pool->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
pool->setName(maxpoolLayerName.c_str());
return pool;
}
nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map& block,
std::vector& weights,
std::vector& trtWeights, int& weightPtr,
int& inputChannels, nvinfer1::ITensor* input,
nvinfer1::INetworkDefinition* network)
{
assert(block.at("type") == "convolutional");
assert(block.find("batch_normalize") == block.end());
assert(block.at("activation") == "linear");
assert(block.find("filters") != block.end());
assert(block.find("pad") != block.end());
assert(block.find("size") != block.end());
assert(block.find("stride") != block.end());
int filters = std::stoi(block.at("filters"));
int padding = std::stoi(block.at("pad"));
int kernelSize = std::stoi(block.at("size"));
int stride = std::stoi(block.at("stride"));
int pad;
if (padding)
pad = (kernelSize - 1) / 2;
else
pad = 0;
// load the convolution layer bias
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters};
float* val = new float[filters];
for (int i = 0; i < filters; ++i)
{
val[i] = weights[weightPtr];
weightPtr++;
}
convBias.values = val;
trtWeights.push_back(convBias);
// load the convolutional layer weights
int size = filters * inputChannels * kernelSize * kernelSize;
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
val = new float[size];
for (int i = 0; i < size; ++i)
{
val[i] = weights[weightPtr];
weightPtr++;
}
convWt.values = val;
trtWeights.push_back(convWt);
nvinfer1::IConvolutionLayer* conv = network->addConvolution(
*input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
assert(conv != nullptr);
std::string convLayerName = "conv_" + std::to_string(layerIdx);
conv->setName(convLayerName.c_str());
conv->setStride(nvinfer1::DimsHW{stride, stride});
conv->setPadding(nvinfer1::DimsHW{pad, pad});
return conv;
}
nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map& block,
std::vector& weights,
std::vector& trtWeights, int& weightPtr,
int& inputChannels, nvinfer1::ITensor* input,
nvinfer1::INetworkDefinition* network)
{
assert(block.at("type") == "convolutional");
assert(block.find("batch_normalize") != block.end());
assert(block.at("batch_normalize") == "1");
assert(block.at("activation") == "leaky");
assert(block.find("filters") != block.end());
assert(block.find("pad") != block.end());
assert(block.find("size") != block.end());
assert(block.find("stride") != block.end());
bool batchNormalize, bias;
if (block.find("batch_normalize") != block.end())
{
batchNormalize = (block.at("batch_normalize") == "1");
bias = false;
}
else
{
batchNormalize = false;
bias = true;
}
// all conv_bn_leaky layers assume bias is false
assert(batchNormalize == true && bias == false);
UNUSED(batchNormalize);
UNUSED(bias);
int filters = std::stoi(block.at("filters"));
int padding = std::stoi(block.at("pad"));
int kernelSize = std::stoi(block.at("size"));
int stride = std::stoi(block.at("stride"));
int pad;
if (padding)
pad = (kernelSize - 1) / 2;
else
pad = 0;
/***** CONVOLUTION LAYER *****/
/*****************************/
// batch norm weights are before the conv layer
// load BN biases (bn_biases)
std::vector bnBiases;
for (int i = 0; i < filters; ++i)
{
bnBiases.push_back(weights[weightPtr]);
weightPtr++;
}
// load BN weights
std::vector bnWeights;
for (int i = 0; i < filters; ++i)
{
bnWeights.push_back(weights[weightPtr]);
weightPtr++;
}
// load BN running_mean
std::vector bnRunningMean;
for (int i = 0; i < filters; ++i)
{
bnRunningMean.push_back(weights[weightPtr]);
weightPtr++;
}
// load BN running_var
std::vector bnRunningVar;
for (int i = 0; i < filters; ++i)
{
// 1e-05 for numerical stability
bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
weightPtr++;
}
// load Conv layer weights (GKCRS)
int size = filters * inputChannels * kernelSize * kernelSize;
nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
float* val = new float[size];
for (int i = 0; i < size; ++i)
{
val[i] = weights[weightPtr];
weightPtr++;
}
convWt.values = val;
trtWeights.push_back(convWt);
nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0};
trtWeights.push_back(convBias);
nvinfer1::IConvolutionLayer* conv = network->addConvolution(
*input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
assert(conv != nullptr);
std::string convLayerName = "conv_" + std::to_string(layerIdx);
conv->setName(convLayerName.c_str());
conv->setStride(nvinfer1::DimsHW{stride, stride});
conv->setPadding(nvinfer1::DimsHW{pad, pad});
/***** BATCHNORM LAYER *****/
/***************************/
size = filters;
// create the weights
nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
float* shiftWt = new float[size];
for (int i = 0; i < size; ++i)
{
shiftWt[i]
= bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
}
shift.values = shiftWt;
float* scaleWt = new float[size];
for (int i = 0; i < size; ++i)
{
scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
}
scale.values = scaleWt;
float* powerWt = new float[size];
for (int i = 0; i < size; ++i)
{
powerWt[i] = 1.0;
}
power.values = powerWt;
trtWeights.push_back(shift);
trtWeights.push_back(scale);
trtWeights.push_back(power);
// Add the batch norm layers
nvinfer1::IScaleLayer* bn = network->addScale(
*conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
assert(bn != nullptr);
std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
bn->setName(bnLayerName.c_str());
/***** ACTIVATION LAYER *****/
/****************************/
nvinfer1::ITensor* bnOutput = bn->getOutput(0);
nvinfer1::IActivationLayer* leaky = network->addActivation(
*bnOutput, nvinfer1::ActivationType::kLEAKY_RELU);
leaky->setAlpha(0.1);
assert(leaky != nullptr);
std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
leaky->setName(leakyLayerName.c_str());
return leaky;
}
nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map& block,
std::vector& weights,
std::vector& trtWeights, int& inputChannels,
nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
{
assert(block.at("type") == "upsample");
nvinfer1::Dims inpDims = input->getDimensions();
assert(inpDims.nbDims == 3);
assert(inpDims.d[1] == inpDims.d[2]);
int h = inpDims.d[1];
int w = inpDims.d[2];
int stride = std::stoi(block.at("stride"));
// add pre multiply matrix as a constant
nvinfer1::Dims preDims{3,
{1, stride * h, w},
{nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
nvinfer1::DimensionType::kSPATIAL}};
int size = stride * h * w;
nvinfer1::Weights preMul{nvinfer1::DataType::kFLOAT, nullptr, size};
float* preWt = new float[size];
/* (2*h * w)
[ [1, 0, ..., 0],
[1, 0, ..., 0],
[0, 1, ..., 0],
[0, 1, ..., 0],
...,
...,
[0, 0, ..., 1],
[0, 0, ..., 1] ]
*/
for (int i = 0, idx = 0; i < h; ++i)
{
for (int s = 0; s < stride; ++s)
{
for (int j = 0; j < w; ++j, ++idx)
{
preWt[idx] = (i == j) ? 1.0 : 0.0;
}
}
}
preMul.values = preWt;
trtWeights.push_back(preMul);
nvinfer1::IConstantLayer* preM = network->addConstant(preDims, preMul);
assert(preM != nullptr);
std::string preLayerName = "preMul_" + std::to_string(layerIdx);
preM->setName(preLayerName.c_str());
// add post multiply matrix as a constant
nvinfer1::Dims postDims{3,
{1, h, stride * w},
{nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
nvinfer1::DimensionType::kSPATIAL}};
size = stride * h * w;
nvinfer1::Weights postMul{nvinfer1::DataType::kFLOAT, nullptr, size};
float* postWt = new float[size];
/* (h * 2*w)
[ [1, 1, 0, 0, ..., 0, 0],
[0, 0, 1, 1, ..., 0, 0],
...,
...,
[0, 0, 0, 0, ..., 1, 1] ]
*/
for (int i = 0, idx = 0; i < h; ++i)
{
for (int j = 0; j < stride * w; ++j, ++idx)
{
postWt[idx] = (j / stride == i) ? 1.0 : 0.0;
}
}
postMul.values = postWt;
trtWeights.push_back(postMul);
nvinfer1::IConstantLayer* post_m = network->addConstant(postDims, postMul);
assert(post_m != nullptr);
std::string postLayerName = "postMul_" + std::to_string(layerIdx);
post_m->setName(postLayerName.c_str());
// add matrix multiply layers for upsampling
nvinfer1::IMatrixMultiplyLayer* mm1
= network->addMatrixMultiply(*preM->getOutput(0), nvinfer1::MatrixOperation::kNONE, *input,
nvinfer1::MatrixOperation::kNONE);
assert(mm1 != nullptr);
std::string mm1LayerName = "mm1_" + std::to_string(layerIdx);
mm1->setName(mm1LayerName.c_str());
nvinfer1::IMatrixMultiplyLayer* mm2
= network->addMatrixMultiply(*mm1->getOutput(0), nvinfer1::MatrixOperation::kNONE,
*post_m->getOutput(0), nvinfer1::MatrixOperation::kNONE);
assert(mm2 != nullptr);
std::string mm2LayerName = "mm2_" + std::to_string(layerIdx);
mm2->setName(mm2LayerName.c_str());
return mm2;
}
void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
std::string layerOutput, std::string weightPtr)
{
std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName;
std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
<< layerOutput;
std::cout << std::setw(6) << std::left << weightPtr << std::endl;
}
================================================
FILE: deepstream_plugin_yolov4/trt_utils.h
================================================
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __TRT_UTILS_H__
#define __TRT_UTILS_H__
#include
#include