Full Code of wang-xinyu/tensorrtx for AI

master 2990f34a8502 cached

744 files

5.6 MB

1.5M tokens

2883 symbols

1 requests

Download .txt

Showing preview only (6,027K chars total). Download the full file or copy to clipboard to get everything.

Repository: wang-xinyu/tensorrtx
Branch: master
Commit: 2990f34a8502
Files: 744
Total size: 5.6 MB

Directory structure:
gitextract_0y61g4fh/

├── .clang-format
├── .cmake-format.yaml
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   └── tensorrtx-issue-template.md
│   ├── stale.yml
│   └── workflows/
│       └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── alexnet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── alexnet.cc
│   ├── alexnet.py
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   └── utils.h
├── arcface/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── arcface-mobilefacenet.cpp
│   ├── arcface-r100.cpp
│   ├── arcface-r50.cpp
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── prelu.cu
│   └── prelu.h
├── assets/
│   └── 6.pgm
├── centernet/
│   ├── README.md
│   ├── centernet.py
│   ├── dcnv2Plugin/
│   │   ├── CMakeLists.txt
│   │   ├── dcn_v2_im2col_cuda.cu
│   │   ├── dcn_v2_im2col_cuda.h
│   │   ├── dcnv2Plugin.cpp
│   │   └── dcnv2Plugin.h
│   └── sample/
│       ├── common.py
│       └── test.py
├── contributing.md
├── convnextv2/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── config.yaml
│   ├── gen_wts.py
│   ├── inference.py
│   └── src/
│       ├── LayerNormPlugin.cu
│       ├── LayerNormPlugin.h
│       ├── convnextv2.cpp
│       ├── inference_cpp.cpp
│       └── logging.h
├── crnn/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── crnn.cpp
│   ├── genwts.py
│   └── logging.h
├── csrnet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── config.h
│   ├── csrnet.cpp
│   ├── gen_wts.py
│   ├── logging.h
│   └── macros.h
├── dbnet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── clipper/
│   │   ├── CMakeLists.txt
│   │   ├── clipper.cpp
│   │   └── clipper.hpp
│   ├── common.hpp
│   ├── dbnet.cpp
│   ├── logging.h
│   └── utils.h
├── densenet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── densenet121.cpp
│   ├── densenet121.py
│   └── logging.h
├── detr/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── backbone.hpp
│   ├── calibrator.hpp
│   ├── common.hpp
│   ├── detr.cpp
│   ├── gen_wts.py
│   ├── logging.h
│   └── macros.h
├── docker/
│   ├── README.md
│   ├── tensorrtx-docker-compose.yml
│   └── x86_64.dockerfile
├── efficient_ad/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── efficientAD_det.cpp
│   └── src/
│       ├── config.h
│       ├── cuda_utils.h
│       ├── logging.h
│       ├── macros.h
│       ├── model.cpp
│       ├── model.h
│       ├── postprocess.h
│       └── utils.h
├── efficientnet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── efficientnet.cpp
│   ├── gen_wts.py
│   ├── logging.h
│   └── utils.hpp
├── ghostnet/
│   ├── README.md
│   ├── ghostnetv1/
│   │   ├── CMakeLists.txt
│   │   ├── gen_wts.py
│   │   ├── ghostnetv1.cpp
│   │   └── logging.h
│   └── ghostnetv2/
│       ├── CMakeLists.txt
│       ├── gen_wts.py
│       ├── ghostnetv2.cpp
│       └── logging.h
├── googlenet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── googlenet.cpp
│   ├── logging.h
│   ├── macros.h
│   └── utils.h
├── hrnet/
│   ├── hrnet-image-classification/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── common.hpp
│   │   ├── demo.py
│   │   ├── hrnet.cpp
│   │   └── logging.h
│   └── hrnet-semantic-segmentation/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── common.hpp
│       ├── gen_wts.py
│       ├── hrnet.cpp
│       ├── hrnet_ocr.cpp
│       ├── hrnet_trt.py
│       └── logging.h
├── ibnnet/
│   ├── CMakeLists.txt
│   ├── InferenceEngine.cpp
│   ├── InferenceEngine.h
│   ├── README.md
│   ├── gen_wts.py
│   ├── holder.h
│   ├── ibnnet.cpp
│   ├── ibnnet.h
│   ├── layers.cpp
│   ├── layers.h
│   ├── logging.h
│   ├── main.cpp
│   ├── utils.cpp
│   └── utils.h
├── inception/
│   ├── inceptionv3/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── inception_v3.cpp
│   │   └── logging.h
│   └── inceptionv4/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── inception_v4.cpp
│       ├── inception_v4.h
│       ├── layers_api.cpp
│       ├── layers_api.h
│       ├── logging.h
│       ├── main.cpp
│       ├── utils.cpp
│       └── utils.h
├── lenet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── lenet.cpp
│   ├── lenet.py
│   ├── lenet_tripy.py
│   ├── logging.h
│   ├── macros.h
│   └── utils.h
├── lprnet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── lprnet.cpp
│   ├── macros.h
│   └── utils.h
├── mlp/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── logging.h
│   ├── macros.h
│   ├── mlp.cpp
│   ├── mlp.py
│   └── utils.h
├── mnasnet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── mnasnet.cpp
│   └── utils.h
├── mobilenet/
│   ├── mobilenetv2/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── logging.h
│   │   ├── mobilenet_v2.cpp
│   │   └── mobilenet_v2.py
│   └── mobilenetv3/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── logging.h
│       ├── mobilenet_v3.cpp
│       └── mobilenet_v3.py
├── psenet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_tf_wts.py
│   ├── layers.cpp
│   ├── layers.h
│   ├── main.cpp
│   ├── psenet.cpp
│   ├── psenet.h
│   ├── utils.cpp
│   └── utils.h
├── rcnn/
│   ├── BatchedNms.cu
│   ├── BatchedNmsPlugin.h
│   ├── CMakeLists.txt
│   ├── MaskRcnnInference.cu
│   ├── MaskRcnnInferencePlugin.h
│   ├── PredictorDecode.cu
│   ├── PredictorDecodePlugin.h
│   ├── README.md
│   ├── RoiAlign.cu
│   ├── RoiAlignPlugin.h
│   ├── RpnDecode.cu
│   ├── RpnDecodePlugin.h
│   ├── RpnNms.cu
│   ├── RpnNmsPlugin.h
│   ├── backbone.hpp
│   ├── calibrator.hpp
│   ├── common.hpp
│   ├── cuda_utils.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   └── rcnn.cpp
├── real-esrgan/
│   ├── general-x4v3/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── cmake/
│   │   │   └── FindTensorRT.cmake
│   │   ├── gen_wts.py
│   │   ├── main.cpp
│   │   └── src/
│   │       ├── include/
│   │       │   ├── config/
│   │       │   │   └── config.hpp
│   │       │   ├── cuda_utils.h
│   │       │   ├── logging/
│   │       │   │   └── logging.h
│   │       │   ├── pixel_shuffle/
│   │       │   │   └── pixel_shuffle.hpp
│   │       │   └── preprocess/
│   │       │       └── preprocess.hpp
│   │       └── pixel_shuffle/
│   │           ├── pixel_shuffle.cpp
│   │           └── pixel_shuffle.cu
│   └── x4plus/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── common.hpp
│       ├── cuda_utils.h
│       ├── gen_wts.py
│       ├── logging.h
│       ├── macros.h
│       ├── postprocess.cu
│       ├── postprocess.hpp
│       ├── preprocess.cu
│       ├── preprocess.hpp
│       ├── real-esrgan.cpp
│       └── utils.h
├── refinedet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── calibrator.cpp
│   ├── calibrator.h
│   ├── configure.h
│   ├── gen_wts_refinedet.py
│   ├── logging.h
│   ├── refinedet.cpp
│   └── utils.h
├── repvgg/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   └── repvgg.cpp
├── resnet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── logging.h
│   ├── resnet18.cpp
│   ├── resnet34.cpp
│   ├── resnet50.cpp
│   ├── resnet50.py
│   ├── resnext50_32x4d.cpp
│   ├── wide_resnet50.py
│   └── wideresnet50.cpp
├── retinaface/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── calibrator.cpp
│   ├── calibrator.h
│   ├── common.hpp
│   ├── decode.cu
│   ├── decode.h
│   ├── logging.h
│   ├── macros.h
│   ├── retina_mnet.cpp
│   ├── retina_r50.cpp
│   └── retinaface_trt.py
├── retinafaceAntiCov/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── decode.cu
│   ├── decode.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   └── retinafaceAntiCov.cpp
├── scaled-yolov4/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── common.hpp
│   ├── gen_wts.py
│   ├── logging.h
│   ├── mish.cu
│   ├── mish.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   └── yolov4_csp.cpp
├── senet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── logging.h
│   └── se_resnet50.cpp
├── shufflenetv2/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── shufflenetv2.cpp
│   └── utils.h
├── squeezenet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── squeezenet.cpp
│   └── utils.h
├── superpoint/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── supernet.cpp
│   ├── utils.cpp
│   └── utils.h
├── swin-transformer/
│   └── semantic-segmentation/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── UpsampleKernel.cu
│       ├── UpsamplePlugin.cpp
│       ├── UpsamplePlugin.h
│       ├── UpsmapleKernel.h
│       ├── common.hpp
│       ├── fillmask.cu
│       ├── fillmask.h
│       ├── gelu.cu
│       ├── gelu.h
│       ├── gen_wts.py
│       ├── include/
│       │   └── dirent.h
│       ├── layerNorm.cu
│       ├── layerNorm.h
│       ├── logging.h
│       ├── main.cpp
│       ├── myhpp.h
│       ├── trainsform.cpp
│       └── utilsn.h
├── tsm/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── demo.sh
│   ├── gen_wts.py
│   ├── logging.h
│   ├── mmaction2_tsm_r50_config.py
│   ├── test_shift.py
│   ├── tsm_r50.cpp
│   └── tsm_r50.py
├── tutorials/
│   ├── check_fp16_int8_support.md
│   ├── faq.md
│   ├── from_pytorch_to_trt_stepbystep_hrnet.md
│   ├── getting_started.md
│   ├── install.md
│   ├── measure_performance.md
│   ├── migration_guide.md
│   ├── multi_GPU_processing.md
│   └── run_on_windows.md
├── ufld/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── common.hpp
│   ├── gen_wts.py
│   ├── lane_det.cpp
│   ├── logging.h
│   ├── macros.h
│   └── pth2onnx.py
├── unet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── common.hpp
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   └── unet.cpp
├── vgg/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── logging.h
│   └── vgg11.cpp
├── vit/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── cuda_allocator.cc
│   ├── cuda_allocator.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── profiler.cc
│   ├── profiler.h
│   ├── utils.h
│   └── vit.cc
├── yolo11/
│   ├── CMakeLists.txt
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── readme.md
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   ├── yolo11_cls.cpp
│   ├── yolo11_cls_trt.py
│   ├── yolo11_det.cpp
│   ├── yolo11_det_trt.py
│   ├── yolo11_obb.cpp
│   ├── yolo11_obb_trt.py
│   ├── yolo11_pose.cpp
│   ├── yolo11_pose_trt.py
│   ├── yolo11_seg.cpp
│   └── yolo11_seg_trt.py
├── yolo11_tripy/
│   ├── .gitignore
│   ├── README.md
│   ├── classify.py
│   ├── compile_classifier.py
│   ├── constants.py
│   ├── model/
│   │   ├── block.py
│   │   └── model.py
│   └── requirements.txt
├── yolo26/
│   ├── .clang-format
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── block.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   └── preprocess.cu
│   ├── yolo26_cls.cpp
│   ├── yolo26_det.cpp
│   └── yolo26_obb.cpp
├── yolop/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── common.hpp
│   ├── cuda_utils.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   ├── yolop.cpp
│   ├── yolop.hpp
│   └── yolop_trt.py
├── yolov10/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   └── preprocess.cu
│   ├── yolov10_det.cpp
│   └── yolov10_det_trt.py
├── yolov12/
│   ├── CMakeLists.txt
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── readme.md
│   ├── src/
│   │   ├── block.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   └── yolo12_det.cpp
├── yolov12-tubro/
│   ├── CMakeLists.txt
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── readme.md
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   ├── yolov12_cls.cpp
│   ├── yolov12_cls_trt.py
│   ├── yolov12_det.cpp
│   ├── yolov12_det_trt.py
│   ├── yolov12_seg.cpp
│   └── yolov12_seg_trt.py
├── yolov13/
│   ├── CMakeLists.txt
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── geluKernel.cu
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── readme.md
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   ├── yolov13_det.cpp
│   └── yolov13_det_trt.py
├── yolov3/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── calibrator.cpp
│   ├── calibrator.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   ├── yolov3.cpp
│   └── yolov3_trt.py
├── yolov3-spp/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── Utils.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   └── yolov3-spp.cpp
├── yolov3-tiny/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   └── yolov3-tiny.cpp
├── yolov4/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── mish.cu
│   ├── mish.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   └── yolov4.cpp
├── yolov5/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── calibrator.cpp
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.cpp
│   │   ├── model.h
│   │   ├── postprocess.cpp
│   │   ├── postprocess.h
│   │   ├── preprocess.cu
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── yolov5_cls.cpp
│   ├── yolov5_cls_trt.py
│   ├── yolov5_det.cpp
│   ├── yolov5_det_cuda_python.py
│   ├── yolov5_det_trt.py
│   ├── yolov5_seg.cpp
│   └── yolov5_seg_trt.py
├── yolov5-lite/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── calibrator.cpp
│   ├── common.hpp
│   ├── gen_wts.py
│   ├── v5lite.cpp
│   ├── yololayer.cu
│   └── yolov5-lite-trt.py
├── yolov7/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── main.cpp
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   └── preprocess.cu
│   └── yolov7_trt.py
├── yolov8/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   ├── yolov8_5u_det.cpp
│   ├── yolov8_5u_det_trt.py
│   ├── yolov8_cls.cpp
│   ├── yolov8_cls_trt.py
│   ├── yolov8_det.cpp
│   ├── yolov8_det_trt.py
│   ├── yolov8_obb.cpp
│   ├── yolov8_obb_trt.py
│   ├── yolov8_pose.cpp
│   ├── yolov8_pose_trt.py
│   ├── yolov8_seg.cpp
│   └── yolov8_seg_trt.py
└── yolov9/
    ├── CMakeLists.txt
    ├── README.md
    ├── demo.cpp
    ├── gen_wts.py
    ├── include/
    │   ├── block.h
    │   ├── calibrator.h
    │   ├── config.h
    │   ├── cuda_utils.h
    │   ├── logging.h
    │   ├── macros.h
    │   ├── model.h
    │   ├── postprocess.h
    │   ├── preprocess.h
    │   ├── types.h
    │   └── utils.h
    ├── plugin/
    │   ├── yololayer.cu
    │   └── yololayer.h
    ├── src/
    │   ├── block.cpp
    │   ├── calibrator.cpp
    │   ├── model.cpp
    │   ├── postprocess.cpp
    │   ├── postprocess.cu
    │   └── preprocess.cu
    ├── windows/
    │   └── dirent.h
    └── yolov9_trt.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
# Google C/C++ Code Style settings (with 4-space)
# Refered to https://github.com/kehanXue/google-style-clang-format/blob/master/.clang-format

Language: Cpp
BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: None
AlignOperands: Align
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: Never  # To avoid conflict, set this "Never" and each "if statement" should include brace when coding
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BreakBeforeBraces: Custom
BraceWrapping:
  AfterCaseLabel: false
  AfterClass: false
  AfterStruct: false
  AfterControlStatement: Never
  AfterEnum: false
  AfterFunction: false
  AfterNamespace: false
  AfterUnion: false
  AfterExternBlock: false
  BeforeCatch: false
  BeforeElse: false
  BeforeLambdaBody: false
  IndentBraces: false
  SplitEmptyFunction: false
  SplitEmptyRecord: false
  SplitEmptyNamespace: false
BreakBeforeBinaryOperators: None
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakInheritanceList: BeforeColon
ColumnLimit: 120
CompactNamespaces: false
ContinuationIndentWidth: 8
Cpp11BracedListStyle: true
DerivePointerAlignment: false  # Make sure the * or & align on the left
EmptyLineBeforeAccessModifier: LogicalBlock
FixNamespaceComments: true
IncludeBlocks: Preserve
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 4
KeepEmptyLinesAtTheStartOfBlocks: true
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PointerAlignment: Left
ReflowComments: false
# SeparateDefinitionBlocks: Always   # Only support since clang-format 14
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: c++11
TabWidth: 8
UseTab: Never


================================================
FILE: .cmake-format.yaml
================================================
_help_parse: Options affecting listfile parsing
parse:
  _help_additional_commands:
  - Specify structure for custom cmake functions
  additional_commands:
    foo:
      flags:
      - BAR
      - BAZ
      kwargs:
        HEADERS: '*'
        SOURCES: '*'
        DEPENDS: '*'
  _help_override_spec:
  - Override configurations per-command where available
  override_spec: {}
  _help_vartags:
  - Specify variable tags.
  vartags: []
  _help_proptags:
  - Specify property tags.
  proptags: []
_help_format: Options affecting formatting.
format:
  _help_disable:
  - Disable formatting entirely, making cmake-format a no-op
  disable: false
  _help_line_width:
  - How wide to allow formatted cmake files
  line_width: 80
  _help_tab_size:
  - How many spaces to tab for indent
  tab_size: 2
  _help_use_tabchars:
  - If true, lines are indented using tab characters (utf-8
  - 0x09) instead of <tab_size> space characters (utf-8 0x20).
  - In cases where the layout would require a fractional tab
  - character, the behavior of the  fractional indentation is
  - governed by <fractional_tab_policy>
  use_tabchars: false
  _help_fractional_tab_policy:
  - If <use_tabchars> is True, then the value of this variable
  - indicates how fractional indentions are handled during
  - whitespace replacement. If set to 'use-space', fractional
  - indentation is left as spaces (utf-8 0x20). If set to
  - '`round-up` fractional indentation is replaced with a single'
  - tab character (utf-8 0x09) effectively shifting the column
  - to the next tabstop
  fractional_tab_policy: use-space
  _help_max_subgroups_hwrap:
  - If an argument group contains more than this many sub-groups
  - (parg or kwarg groups) then force it to a vertical layout.
  max_subgroups_hwrap: 2
  _help_max_pargs_hwrap:
  - If a positional argument group contains more than this many
  - arguments, then force it to a vertical layout.
  max_pargs_hwrap: 6
  _help_max_rows_cmdline:
  - If a cmdline positional group consumes more than this many
  - lines without nesting, then invalidate the layout (and nest)
  max_rows_cmdline: 2
  _help_separate_ctrl_name_with_space:
  - If true, separate flow control names from their parentheses
  - with a space
  separate_ctrl_name_with_space: false
  _help_separate_fn_name_with_space:
  - If true, separate function names from parentheses with a
  - space
  separate_fn_name_with_space: false
  _help_dangle_parens:
  - If a statement is wrapped to more than one line, than dangle
  - the closing parenthesis on its own line.
  dangle_parens: false
  _help_dangle_align:
  - If the trailing parenthesis must be 'dangled' on its on
  - 'line, then align it to this reference: `prefix`: the start'
  - 'of the statement,  `prefix-indent`: the start of the'
  - 'statement, plus one indentation  level, `child`: align to'
  - the column of the arguments
  dangle_align: prefix
  _help_min_prefix_chars:
  - If the statement spelling length (including space and
  - parenthesis) is smaller than this amount, then force reject
  - nested layouts.
  min_prefix_chars: 4
  _help_max_prefix_chars:
  - If the statement spelling length (including space and
  - parenthesis) is larger than the tab width by more than this
  - amount, then force reject un-nested layouts.
  max_prefix_chars: 10
  _help_max_lines_hwrap:
  - If a candidate layout is wrapped horizontally but it exceeds
  - this many lines, then reject the layout.
  max_lines_hwrap: 2
  _help_line_ending:
  - What style line endings to use in the output.
  line_ending: unix
  _help_command_case:
  - Format command names consistently as 'lower' or 'upper' case
  command_case: canonical
  _help_keyword_case:
  - Format keywords consistently as 'lower' or 'upper' case
  keyword_case: unchanged
  _help_always_wrap:
  - A list of command names which should always be wrapped
  always_wrap: []
  _help_enable_sort:
  - If true, the argument lists which are known to be sortable
  - will be sorted lexicographicall
  enable_sort: true
  _help_autosort:
  - If true, the parsers may infer whether or not an argument
  - list is sortable (without annotation).
  autosort: false
  _help_require_valid_layout:
  - By default, if cmake-format cannot successfully fit
  - everything into the desired linewidth it will apply the
  - last, most agressive attempt that it made. If this flag is
  - True, however, cmake-format will print error, exit with non-
  - zero status code, and write-out nothing
  require_valid_layout: false
  _help_layout_passes:
  - A dictionary mapping layout nodes to a list of wrap
  - decisions. See the documentation for more information.
  layout_passes: {}
_help_markup: Options affecting comment reflow and formatting.
markup:
  _help_bullet_char:
  - What character to use for bulleted lists
  bullet_char: '*'
  _help_enum_char:
  - What character to use as punctuation after numerals in an
  - enumerated list
  enum_char: .
  _help_first_comment_is_literal:
  - If comment markup is enabled, don't reflow the first comment
  - block in each listfile. Use this to preserve formatting of
  - your copyright/license statements.
  first_comment_is_literal: false
  _help_literal_comment_pattern:
  - If comment markup is enabled, don't reflow any comment block
  - which matches this (regex) pattern. Default is `None`
  - (disabled).
  literal_comment_pattern: null
  _help_fence_pattern:
  - Regular expression to match preformat fences in comments
  - default= ``r'^\s*([`~]{3}[`~]*)(.*)$'``
  fence_pattern: ^\s*([`~]{3}[`~]*)(.*)$
  _help_ruler_pattern:
  - Regular expression to match rulers in comments default=
  - '``r''^\s*[^\w\s]{3}.*[^\w\s]{3}$''``'
  ruler_pattern: ^\s*[^\w\s]{3}.*[^\w\s]{3}$
  _help_explicit_trailing_pattern:
  - If a comment line matches starts with this pattern then it
  - is explicitly a trailing comment for the preceeding
  - argument. Default is '#<'
  explicit_trailing_pattern: '#<'
  _help_hashruler_min_length:
  - If a comment line starts with at least this many consecutive
  - hash characters, then don't lstrip() them off. This allows
  - for lazy hash rulers where the first hash char is not
  - separated by space
  hashruler_min_length: 10
  _help_canonicalize_hashrulers:
  - If true, then insert a space between the first hash char and
  - remaining hash chars in a hash ruler, and normalize its
  - length to fill the column
  canonicalize_hashrulers: true
  _help_enable_markup:
  - enable comment markup parsing and reflow
  enable_markup: true
_help_lint: Options affecting the linter
lint:
  _help_disabled_codes:
  - a list of lint codes to disable
  disabled_codes: []
  _help_function_pattern:
  - regular expression pattern describing valid function names
  function_pattern: '[0-9a-z_]+'
  _help_macro_pattern:
  - regular expression pattern describing valid macro names
  macro_pattern: '[0-9A-Z_]+'
  _help_global_var_pattern:
  - regular expression pattern describing valid names for
  - variables with global (cache) scope
  global_var_pattern: '[A-Z][0-9A-Z_]+'
  _help_internal_var_pattern:
  - regular expression pattern describing valid names for
  - variables with global scope (but internal semantic)
  internal_var_pattern: _[A-Z][0-9A-Z_]+
  _help_local_var_pattern:
  - regular expression pattern describing valid names for
  - variables with local scope
  local_var_pattern: '[a-z][a-z0-9_]+'
  _help_private_var_pattern:
  - regular expression pattern describing valid names for
  - privatedirectory variables
  private_var_pattern: _[0-9a-z_]+
  _help_public_var_pattern:
  - regular expression pattern describing valid names for public
  - directory variables
  public_var_pattern: '[A-Z][0-9A-Z_]+'
  _help_argument_var_pattern:
  - regular expression pattern describing valid names for
  - function/macro arguments and loop variables.
  argument_var_pattern: '[a-z][a-z0-9_]+'
  _help_keyword_pattern:
  - regular expression pattern describing valid names for
  - keywords used in functions or macros
  keyword_pattern: '[A-Z][0-9A-Z_]+'
  _help_max_conditionals_custom_parser:
  - In the heuristic for C0201, how many conditionals to match
  - within a loop in before considering the loop a parser.
  max_conditionals_custom_parser: 2
  _help_min_statement_spacing:
  - Require at least this many newlines between statements
  min_statement_spacing: 1
  _help_max_statement_spacing:
  - Require no more than this many newlines between statements
  max_statement_spacing: 2
  max_returns: 6
  max_branches: 12
  max_arguments: 5
  max_localvars: 15
  max_statements: 50
_help_encode: Options affecting file encoding
encode:
  _help_emit_byteorder_mark:
  - If true, emit the unicode byte-order mark (BOM) at the start
  - of the file
  emit_byteorder_mark: false
  _help_input_encoding:
  - Specify the encoding of the input file. Defaults to utf-8
  input_encoding: utf-8
  _help_output_encoding:
  - Specify the encoding of the output file. Defaults to utf-8.
  - Note that cmake only claims to support utf-8 so be careful
  - when using anything else
  output_encoding: utf-8
_help_misc: Miscellaneous configurations options.
misc:
  _help_per_command:
  - A dictionary containing any per-command configuration
  - overrides. Currently only `command_case` is supported.
  per_command: {}


================================================
FILE: .github/ISSUE_TEMPLATE/tensorrtx-issue-template.md
================================================
---
name: tensorrtx issue template
about: To understand your issue better
title: ''
labels: ''
assignees: ''

---

## Env

- GPU, e.g. V100, RTX2080, TX2, Xavier NX, Nano, etc.
- OS, e.g. Ubuntu16.04, Win10, etc.
- Cuda version
- TensorRT version

## About this repo

- which branch/tag/commit are you using?
- which model? yolov5, retinaface?

## Your problem

- what is your command? e.g. `sudo ./yolov5 -s`
- what's your output?
- what output do you expect?


================================================
FILE: .github/stale.yml
================================================
# Number of days of inactivity before an issue becomes stale
daysUntilStale: 60
# Number of days of inactivity before a stale issue is closed
daysUntilClose: 7
# Issues with these labels will never be considered stale
exemptLabels:
  - pinned
  - security
# Label to use when marking an issue as stale
staleLabel: wontfix
# Comment to post when marking an issue as stale. Set to `false` to disable
markComment: >
  This issue has been automatically marked as stale because it has not had
  recent activity. It will be closed if no further activity occurs. Thank you
  for your contributions.
# Comment to post when closing a stale issue. Set to `false` to disable
closeComment: false


================================================
FILE: .github/workflows/pre-commit.yml
================================================
name: pre-commit

on:
  pull_request:
    branches:
      - master
      - trt10

  push:
    branches:
      - master
      - trt10

jobs:
  pre-commit:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v5
        with:
          # grab the history of the PR
          fetch-depth: 0

      - name: Fetch commits
        run: |
          git fetch origin ${{ github.event.before }} || true
          git fetch origin ${{ github.sha }}

      - uses: actions/setup-python@v4

      - uses: pre-commit/action@v3.0.1
        if: github.event_name == 'push'
        with:
          extra_args: >
            --from-ref ${{ github.event.before }}
            --to-ref   ${{ github.sha }}
            --show-diff-on-failure --color=always

      - uses: pre-commit/action@v3.0.1
        if: github.event_name == 'pull_request'
        with:
          extra_args: >
            --from-ref ${{ github.event.pull_request.base.sha }}
            --to-ref   ${{ github.event.pull_request.head.sha }}
            --show-diff-on-failure --color=always


================================================
FILE: .gitignore
================================================
models
build
*.wts
*.engine
*.tpymodel
*/*.ppm
*idea*

.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets

# Local History for Visual Studio Code
.history/

# Built Visual Studio Code Extensions
*.vsix

.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets

# Local History for Visual Studio Code
.history/

# Built Visual Studio Code Extensions
*.vsix

# Prerequisites
*.d

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Fortran module files
*.mod
*.smod

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app

CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps
CMakeUserPresets.json


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.5.0
    hooks:
      - id: check-merge-conflict
      - id: check-symlinks
      - id: end-of-file-fixer
        types: [python]
      - id: trailing-whitespace
        types: [python]
      - id: check-added-large-files
  - repo: https://github.com/pre-commit/mirrors-clang-format
    rev: v18.1.3
    hooks:
      - id: clang-format
        types_or: [c++, c, cuda]
        args: [-style=file]
  - repo: https://github.com/PyCQA/flake8
    rev: 7.0.0
    hooks:
      - id: flake8
        args: [--max-line-length=120]
  - repo: https://github.com/cheshirekow/cmake-format-precommit
    rev: v0.6.13
    hooks:
      - id: cmake-format
        additional_dependencies: [pyyaml]
        args: [--in-place, -c, .cmake-format.yaml]
        types: [file]
        files: (\.cmake|CMakeLists.txt)(.in)?$


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2019-2020 Wang Xinyu

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# TensorRTx

TensorRTx aims to implement popular deep learning networks with TensorRT network definition API.

Why don't we use a parser (ONNX parser, UFF parser, caffe parser, etc), but use complex APIs to build a network from scratch? I have summarized the advantages in the following aspects.

- **Flexible**, easy to modify the network, add/delete a layer or input/output tensor, replace a layer, merge layers, integrate preprocessing and postprocessing into network, etc.
- **Debuggable**, construct the entire network in an incremental development manner, easy to get middle layer results.
- **Educational**, learn about the network structure during this development, rather than treating everything as a black box.

The basic workflow of TensorRTx is:

1. Get the trained models from pytorch, mxnet or tensorflow, etc. Some pytorch models can be found in my repo [pytorchx](https://github.com/wang-xinyu/pytorchx), the remaining are from popular open-source repos.
2. Export the weights to a plain text file -- [.wts file](./tutorials/getting_started.md#the-wts-content-format).
3. Load weights in TensorRT, define the network, build a TensorRT engine.
4. Load the TensorRT engine and run inference.

## News

- `3 Mar 2026`. [zgjja](https://github.com/zgjja) Add Vision Transformer
- `2 Feb 2026`. [fazligorkembal](https://github.com/fazligorkembal) Yolo26-Det, Yolo26-Obb, Yolo26-Cls
- `15 Jan 2026`. [zgjja](https://github.com/zgjja) Refactor multiple old CV models to support TensorRT SDK through 7~10.
- `8 Jan 2026`. [ydk61](https://github.com/ydk61): YOLOv13
- `10 May 2025`. [pranavm-nvidia](https://github.com/pranavm-nvidia): [YOLO11](./yolo11_tripy) writen in [Tripy](https://github.com/NVIDIA/TensorRT-Incubator/tree/main/tripy).
- `2 May 2025`. [fazligorkembal](https://github.com/fazligorkembal): YOLO12
- `12 Apr 2025`. [pranavm-nvidia](https://github.com/pranavm-nvidia): First [Lenet](https://github.com/wang-xinyu/tensorrtx/tree/master/lenet#tripy-new-tensorrt-python-programming-model) example writen in [Tripy](https://github.com/NVIDIA/TensorRT-Incubator/tree/main/tripy).
- `11 Apr 2025`. [mpj1234](https://github.com/mpj1234): [YOLO11-obb](https://github.com/wang-xinyu/tensorrtx/tree/master/yolo11)
- `22 Oct 2024`. [lindsayshuo](https://github.com/lindsayshuo): YOLOv8-obb
- `18 Oct 2024`. [zgjja](https://github.com/zgjja): Refactor docker image.
- `11 Oct 2024`. [mpj1234](https://github.com/mpj1234): YOLO11
- `9 Oct 2024`. [Phoenix8215](https://github.com/Phoenix8215): GhostNet V1 and V2.
- `21 Aug 2024`. [Lemonononon](https://github.com/Lemonononon): real-esrgan-general-x4v3
- `29 Jul 2024`. [mpj1234](https://github.com/mpj1234): Check the YOLOv5, YOLOv8 & YOLOv10 in TensorRT 10.x API, branch → [trt10](https://github.com/wang-xinyu/tensorrtx/tree/trt10)
- `29 Jul 2024`. [mpj1234](https://github.com/mpj1234): YOLOv10
- `21 Jun 2024`. [WuxinrongY](https://github.com/WuxinrongY): YOLOv9-T, YOLOv9-S, YOLOv9-M
- `28 Apr 2024`. [lindsayshuo](https://github.com/lindsayshuo): YOLOv8-pose
- `22 Apr 2024`. [B1SH0PP](https://github.com/B1SH0PP): EfficientAd: Accurate Visual Anomaly Detection at Millisecond-Level Latencies.
- `18 Apr 2024`. [lindsayshuo](https://github.com/lindsayshuo): YOLOv8-p2

## Tutorials

- [How to make contribution](./tutorials/contribution.md)
- [Install the dependencies.](./tutorials/install.md)
- [A guide for quickly getting started, taking lenet5 as a demo.](./tutorials/getting_started.md)
- [The .wts file content format](./tutorials/getting_started.md#the-wts-content-format)
- [Frequently Asked Questions (FAQ)](./tutorials/faq.md)
- [Migration Guide](./tutorials/migration_guide.md)
- [How to implement multi-GPU processing, taking YOLOv4 as example](./tutorials/multi_GPU_processing.md)
- [Check if Your GPU support FP16/INT8](./tutorials/check_fp16_int8_support.md)
- [How to Compile and Run on Windows](./tutorials/run_on_windows.md)
- [Deploy YOLOv4 with Triton Inference Server](https://github.com/isarsoft/yolov4-triton-tensorrt)
- [From pytorch to trt step by step, hrnet as example(Chinese)](./tutorials/from_pytorch_to_trt_stepbystep_hrnet.md)

## Test Environment

1. (**NOT recommended**) TensorRT 7.x
2. (**Recommended**)TensorRT 8.x
3. (**NOT recommended**) TensorRT 10.x

### Note

1. For history reason, some of the models are limited to specific TensorRT version, please check the README.md or code for the model you want to use.
2. Currently, TensorRT 8.x has better compatibility and the most of the features supported.

## How to run

**Note**: this project support to build each network by the `CMakeLists.txt` in its subfolder, or you can build them together by the `CMakeLists.txt` on top of this project.

- General procedures before building and running:

```bash
# 1. generate xxx.wts from https://github.com/wang-xinyu/pytorchx/tree/master/lenet
# ...

# 2. put xxx.wts on top of this folder
# ...
```

- (_Option 1_) To build a single subproject in this project, do:

```bash
## enter the subfolder
cd tensorrtx/xxx

## configure & build
cmake -S . -B build
make -C build
```

- (_Option 2_) To build many subprojects, firstly, in the top `CMakeLists.txt`, **uncomment** the project you don't want to build or not suppoted by your TensorRT version, e.g., you cannot build subprojects in `${TensorRT_8_Targets}` if your TensorRT is `7.x`. Then:

```bash
## enter the top of this project
cd tensorrtx

## configure & build
# you may use "Ninja" rather than "make" to significantly boost the build speed
cmake -G Ninja -S . -B build
ninja -C build
```

**WARNING**: This part is still under development, most subprojects are not adapted yet.

- run the generated executable, e.g.:

```bash
# serialize model to plan file i.e. 'xxx.engine'
build/xxx -s

# deserialize plan file and run inference
build/xxx -d

# (Optional) check if the output is same as pytorchx/lenet
# ...

# (Optional) customize the project
# ...
```

For more details, each subfolder may contain a `README.md` inside, which explains more.

## Models

Following models are implemented.

| Name                                     | Description                                                                                                                                                                                                                                                       |
| ---------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [mlp](./mlp)                             | the very basic model for starters, properly documented                                                                                                                                                                                                            |
| [lenet](./lenet)                         | the simplest, as a "hello world" of this project                                                                                                                                                                                                                  |
| [alexnet](./alexnet)                     | easy to implement, all layers are supported in tensorrt                                                                                                                                                                                                           |
| [googlenet](./googlenet)                 | GoogLeNet (Inception v1)                                                                                                                                                                                                                                          |
| [inception](./inception)                 | Inception v3, v4                                                                                                                                                                                                                                                  |
| [mnasnet](./mnasnet)                     | MNASNet with depth multiplier of 0.5 from the paper                                                                                                                                                                                                               |
| [mobilenet](./mobilenet)                 | MobileNet v2, v3-small, v3-large                                                                                                                                                                                                                                  |
| [resnet](./resnet)                       | resnet-18, resnet-50 and resnext50-32x4d are implemented                                                                                                                                                                                                          |
| [senet](./senet)                         | se-resnet50                                                                                                                                                                                                                                                       |
| [shufflenet](./shufflenetv2)             | ShuffleNet v2 with 0.5x output channels                                                                                                                                                                                                                           |
| [squeezenet](./squeezenet)               | SqueezeNet 1.1 model                                                                                                                                                                                                                                              |
| [vgg](./vgg)                             | VGG 11-layer model                                                                                                                                                                                                                                                |
| [ViT](./vit)                             | vision transformer, using weight and model from huggingface                                                                                                                                                                                                       |
| [yolov3-tiny](./yolov3-tiny)             | weights and pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3)                                                                                                                                                               |
| [yolov3](./yolov3)                       | darknet-53, weights and pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3)                                                                                                                                                   |
| [yolov3-spp](./yolov3-spp)               | darknet-53, weights and pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3)                                                                                                                                                   |
| [yolov4](./yolov4)                       | CSPDarknet53, weights from [AlexeyAB/darknet](https://github.com/AlexeyAB/darknet#pre-trained-models), pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3)                                                                    |
| [yolov5](./yolov5)                       | yolov5 v1.0-v7.0 of [ultralytics/yolov5](https://github.com/ultralytics/yolov5), detection, classification and instance segmentation                                                                                                                              |
| [yolov7](./yolov7)                       | yolov7 v0.1, pytorch implementation from [WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7)                                                                                                                                                                |
| [yolov8](./yolov8)                       | yolov8, pytorch implementation from [ultralytics](https://github.com/ultralytics/ultralytics)                                                                                                                                                                     |
| [yolov9](./yolov9)                       | The Pytorch implementation is [WongKinYiu/yolov9](https://github.com/WongKinYiu/yolov9).                                                                                                                                                                          |
| [yolov10](./yolov10)                     | The Pytorch implementation is [THU-MIG/yolov10](https://github.com/THU-MIG/yolov10).                                                                                                                                                                              |
| [yolo11](./yolo11)                       | The Pytorch implementation is [ultralytics](https://github.com/ultralytics/ultralytics).                                                                                                                                                                          |
| [yolo12](./yolov12)                      | The Pytorch implementation is [ultralytics](https://github.com/ultralytics/ultralytics).                                                                                                                                                                          |
| [yolop](./yolop)                         | yolop, pytorch implementation from [hustvl/YOLOP](https://github.com/hustvl/YOLOP)                                                                                                                                                                                |
| [retinaface](./retinaface)               | resnet50 and mobilnet0.25, weights from [biubug6/Pytorch_Retinaface](https://github.com/biubug6/Pytorch_Retinaface)                                                                                                                                               |
| [arcface](./arcface)                     | LResNet50E-IR, LResNet100E-IR and MobileFaceNet, weights from [deepinsight/insightface](https://github.com/deepinsight/insightface)                                                                                                                               |
| [retinafaceAntiCov](./retinafaceAntiCov) | mobilenet0.25, weights from [deepinsight/insightface](https://github.com/deepinsight/insightface), retinaface anti-COVID-19, detect face and mask attribute                                                                                                       |
| [dbnet](./dbnet)                         | Scene Text Detection, weights from [BaofengZan/DBNet.pytorch](https://github.com/BaofengZan/DBNet.pytorch)                                                                                                                                                        |
| [crnn](./crnn)                           | pytorch implementation from [meijieru/crnn.pytorch](https://github.com/meijieru/crnn.pytorch)                                                                                                                                                                     |
| [ufld](./ufld)                           | pytorch implementation from [Ultra-Fast-Lane-Detection](https://github.com/cfzd/Ultra-Fast-Lane-Detection), ECCV2020                                                                                                                                              |
| [hrnet](./hrnet)                         | hrnet-image-classification and hrnet-semantic-segmentation, pytorch implementation from [HRNet-Image-Classification](https://github.com/HRNet/HRNet-Image-Classification) and [HRNet-Semantic-Segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation) |
| [psenet](./psenet)                       | PSENet Text Detection, tensorflow implementation from [liuheng92/tensorflow_PSENet](https://github.com/liuheng92/tensorflow_PSENet)                                                                                                                               |
| [ibnnet](./ibnnet)                       | IBN-Net, pytorch implementation from [XingangPan/IBN-Net](https://github.com/XingangPan/IBN-Net), ECCV2018                                                                                                                                                        |
| [unet](./unet)                           | U-Net, pytorch implementation from [milesial/Pytorch-UNet](https://github.com/milesial/Pytorch-UNet)                                                                                                                                                              |
| [repvgg](./repvgg)                       | RepVGG, pytorch implementation from [DingXiaoH/RepVGG](https://github.com/DingXiaoH/RepVGG)                                                                                                                                                                       |
| [lprnet](./lprnet)                       | LPRNet, pytorch implementation from [xuexingyu24/License_Plate_Detection_Pytorch](https://github.com/xuexingyu24/License_Plate_Detection_Pytorch)                                                                                                                 |
| [refinedet](./refinedet)                 | RefineDet, pytorch implementation from [luuuyi/RefineDet.PyTorch](https://github.com/luuuyi/RefineDet.PyTorch)                                                                                                                                                    |
| [densenet](./densenet)                   | DenseNet-121, from torchvision.models                                                                                                                                                                                                                             |
| [rcnn](./rcnn)                           | FasterRCNN and MaskRCNN, model from [detectron2](https://github.com/facebookresearch/detectron2)                                                                                                                                                                  |
| [tsm](./tsm)                             | TSM: Temporal Shift Module for Efficient Video Understanding, ICCV2019                                                                                                                                                                                            |
| [scaled-yolov4](./scaled-yolov4)         | yolov4-csp, pytorch from [WongKinYiu/ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)                                                                                                                                                                    |
| [centernet](./centernet)                 | CenterNet DLA-34, pytorch from [xingyizhou/CenterNet](https://github.com/xingyizhou/CenterNet)                                                                                                                                                                    |
| [efficientnet](./efficientnet)           | EfficientNet b0-b8 and l2, pytorch from [lukemelas/EfficientNet-PyTorch](https://github.com/lukemelas/EfficientNet-PyTorch)                                                                                                                                       |
| [detr](./detr)                           | DE⫶TR, pytorch from [facebookresearch/detr](https://github.com/facebookresearch/detr)                                                                                                                                                                             |
| [swin-transformer](./swin-transformer)   | Swin Transformer - Semantic Segmentation, only support Swin-T. The Pytorch implementation is [microsoft/Swin-Transformer](https://github.com/microsoft/Swin-Transformer.git)                                                                                      |
| [real-esrgan](./real-esrgan)             | Real-ESRGAN. The Pytorch implementation is [real-esrgan](https://github.com/xinntao/Real-ESRGAN)                                                                                                                                                                  |
| [superpoint](./superpoint)               | SuperPoint. The Pytorch model is from [magicleap/SuperPointPretrainedNetwork](https://github.com/magicleap/SuperPointPretrainedNetwork)                                                                                                                           |
| [csrnet](./csrnet)                       | CSRNet. The Pytorch implementation is [leeyeehoo/CSRNet-pytorch](https://github.com/leeyeehoo/CSRNet-pytorch)                                                                                                                                                     |
| [EfficientAd](./efficient_ad)            | EfficientAd: Accurate Visual Anomaly Detection at Millisecond-Level Latencies. From [anomalib](https://github.com/openvinotoolkit/anomalib)                                                                                                                       |

## Model Zoo

The .wts files can be downloaded from model zoo for quick evaluation. But it is recommended to convert .wts from pytorch/mxnet/tensorflow model, so that you can retrain your own model.

[GoogleDrive](https://drive.google.com/drive/folders/1Ri0IDa5OChtcA3zjqRTW57uG6TnfN4Do?usp=sharing) | [BaiduPan](https://pan.baidu.com/s/19s6hO8esU7-TtZEXN7G3OA) pwd: uvv2

## Tricky Operations

Some tricky operations encountered in these models, already solved, but might have better solutions.

| Name                      | Description                                                                                           |
| ------------------------- | ----------------------------------------------------------------------------------------------------- |
| BatchNorm                 | Implement by a scale layer, used in resnet, googlenet, mobilenet, etc.                                |
| MaxPool2d(ceil_mode=True) | use a padding layer before maxpool to solve ceil_mode=True, see googlenet.                            |
| average pool with padding | use setAverageCountExcludesPadding() when necessary, see inception.                                   |
| relu6                     | use `Relu6(x) = Relu(x) - Relu(x-6)`, see mobilenet.                                                  |
| torch.chunk()             | implement the 'chunk(2, dim=C)' by tensorrt plugin, see shufflenet.                                   |
| channel shuffle           | use two shuffle layers to implement `channel_shuffle`, see shufflenet.                                |
| adaptive pool             | use fixed input dimension, and use regular average pooling, see shufflenet.                           |
| leaky relu                | I wrote a leaky relu plugin, but PRelu in `NvInferPlugin.h` can be used, see yolov3 in branch `trt4`. |
| yolo layer v1             | yolo layer is implemented as a plugin, see yolov3 in branch `trt4`.                                   |
| yolo layer v2             | three yolo layers implemented in one plugin, see yolov3-spp.                                          |
| upsample                  | replaced by a deconvolution layer, see yolov3.                                                        |
| hsigmoid                  | hard sigmoid is implemented as a plugin, hsigmoid and hswish are used in mobilenetv3                  |
| retinaface output decode  | implement a plugin to decode bbox, confidence and landmarks, see retinaface.                          |
| mish                      | mish activation is implemented as a plugin, mish is used in yolov4                                    |
| prelu                     | mxnet's prelu activation with trainable gamma is implemented as a plugin, used in arcface             |
| HardSwish                 | hard_swish = x \* hard_sigmoid, used in yolov5 v3.0                                                   |
| LSTM                      | Implemented pytorch nn.LSTM() with tensorrt api                                                       |

## Speed Benchmark

| Models                    | Device               | BatchSize | Mode | Input Shape(HxW) | FPS  |
| ------------------------- | -------------------- | :-------: | :--: | :--------------: | :--: |
| YOLOv3-tiny               | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      | 333  |
| YOLOv3(darknet53)         | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      | 39.2 |
| YOLOv3(darknet53)         | Xeon E5-2620/GTX1080 |     1     | INT8 |     608x608      | 71.4 |
| YOLOv3-spp(darknet53)     | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      | 38.5 |
| YOLOv4(CSPDarknet53)      | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      | 35.7 |
| YOLOv4(CSPDarknet53)      | Xeon E5-2620/GTX1080 |     4     | FP32 |     608x608      | 40.9 |
| YOLOv4(CSPDarknet53)      | Xeon E5-2620/GTX1080 |     8     | FP32 |     608x608      | 41.3 |
| YOLOv5-s v3.0             | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      | 142  |
| YOLOv5-s v3.0             | Xeon E5-2620/GTX1080 |     4     | FP32 |     608x608      | 173  |
| YOLOv5-s v3.0             | Xeon E5-2620/GTX1080 |     8     | FP32 |     608x608      | 190  |
| YOLOv5-m v3.0             | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      |  71  |
| YOLOv5-l v3.0             | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      |  43  |
| YOLOv5-x v3.0             | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      |  29  |
| YOLOv5-s v4.0             | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      | 142  |
| YOLOv5-m v4.0             | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      |  71  |
| YOLOv5-l v4.0             | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      |  40  |
| YOLOv5-x v4.0             | Xeon E5-2620/GTX1080 |     1     | FP32 |     608x608      |  27  |
| RetinaFace(resnet50)      | Xeon E5-2620/GTX1080 |     1     | FP32 |     480x640      |  90  |
| RetinaFace(resnet50)      | Xeon E5-2620/GTX1080 |     1     | INT8 |     480x640      | 204  |
| RetinaFace(mobilenet0.25) | Xeon E5-2620/GTX1080 |     1     | FP32 |     480x640      | 417  |
| ArcFace(LResNet50E-IR)    | Xeon E5-2620/GTX1080 |     1     | FP32 |     112x112      | 333  |
| CRNN                      | Xeon E5-2620/GTX1080 |     1     | FP32 |      32x100      | 1000 |

Help wanted, if you got speed results, please add an issue or PR.

## Acknowledgments & Contact

Any contributions, questions and discussions are welcomed, contact me by following info.

E-mail: wangxinyu_es@163.com

WeChat ID: wangxinyu0375 (可加我微信进 tensorrtx 交流群，**备注：tensorrtx**)


================================================
FILE: alexnet/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14)

project(
  alexnet
  VERSION 0.1
  LANGUAGES C CXX CUDA)

if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
  set(CMAKE_CUDA_ARCHITECTURES
      75
      80
      86
      89
      90
      100
      120)
endif()

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)

option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF)

find_package(Threads REQUIRED)
find_package(CUDAToolkit REQUIRED)
find_package(OpenCV REQUIRED)

if(NOT TARGET TensorRT::TensorRT)
  include(FindTensorRT.cmake)
else()
  message("TensorRT has been found, skipping for ${PROJECT_NAME}")
endif()

add_executable(${PROJECT_NAME} alexnet.cc)

target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}
                                                   ${OpenCV_INCLUDE_DIRS})

target_link_libraries(
  ${PROJECT_NAME} PRIVATE Threads::Threads TensorRT::TensorRT CUDA::cudart
                          ${OpenCV_LIBS})


================================================
FILE: alexnet/FindTensorRT.cmake
================================================
cmake_minimum_required(VERSION 3.17.0)

function(_guess_path var_name required_files)
  set(_result "")

  foreach(path_entry IN LISTS ARGN)
    if(NOT EXISTS "${path_entry}")
      message(DEBUG "skip non-existing path '${path_entry}'")
      continue()
    endif()

    set(_ok TRUE)
    foreach(required_file IN LISTS required_files)
      if(NOT EXISTS "${path_entry}/${required_file}")
        set(_ok FALSE)
        message(DEBUG "'${path_entry}' missing '${required_file}'")
        break()
      endif()
    endforeach()

    if(_ok)
      list(APPEND _result "${path_entry}")
      message(DEBUG "accept '${path_entry}'")
    else()
      message(DEBUG "reject '${path_entry}'")
    endif()
  endforeach()

  if(_result STREQUAL "")
    message(
      FATAL_ERROR
        "_guess_path(${var_name}) failed: no valid path found. required_files='${required_files}' candidates='${ARGN}'"
    )
  endif()

  set(${var_name}
      "${_result}"
      PARENT_SCOPE)
endfunction()

# add library
add_library(TensorRT IMPORTED INTERFACE)
add_library(TensorRT::TensorRT ALIAS TensorRT)

set(TRT_VERSION
    CACHE
      STRING
      "TensorRT version, e.g. \"8.6.1.6\" or \"8.6.1.6+cuda12.0.1.011\", \"8.6.1.6.Windows10.x86_64.cuda-12.0\" etc"
)

if(NOT TRT_VERSION STREQUAL "" AND NOT $ENV{TRT_VERSION} STREQUAL "")
  message(
    WARNING
      "TRT_VERSION defined by cmake and environment variable both, using the later one"
  )
endif()

if(NOT $ENV{TRT_VERSION} STREQUAL "")
  set(TRT_VERSION $ENV{TRT_VERSION})
endif()

string(REGEX MATCH "([0-9]+)" _match ${TRT_VERSION})
set(TRT_MAJOR_VERSION "${_match}")
unset(_match)

if(WIN32)
  set(TensorRT_DIR "C:/Program Files/TensorRT-${TRT_VERSION}")
  if(NOT EXISTS "${TensorRT_DIR}")
    message(
      FATAL_ERROR
        "TensorRT_DIR=${TensorRT_DIR} does not exist!"
    )
  endif()

  if(${TRT_MAJOR_VERSION} GREATER_EQUAL 10)
    set(_modules nvinfer_10 nvinfer_plugin_10 nvinfer_vc_plugin_10
                 nvinfer_dispatch_10 nvinfer_lean_10)
    message(DEBUG "Using ${_modules}")
  else()
    set(_modules nvinfer nvinfer_plugin nvinfer_vc_plugin nvinfer_dispatch
                 nvinfer_lean)
  endif()

  set(TensorRT_LIBRARY_DIR "${TensorRT_DIR}/lib")
  set(TensorRT_INCLUDE_DIR "${TensorRT_DIR}/include")
elseif(UNIX)
  string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _trt_arch)
  set(_trt_include_candidates)
  if(_trt_arch MATCHES "^(aarch64|arm64|arch64)$")
    set(_trt_include_candidates "/usr/include/aarch64-linux-gnu" "/usr/include"
                                "/usr/local/cuda/targets/aarch64-linux/include")
    set(_trt_library_candidates
        "/usr/local/tensorrt/targets/aarch64-linux-gnu/lib"
        "/usr/lib/aarch64-linux-gnu" "/usr/lib/aarch64-linux-gnu/tegra"
        "/usr/lib")
  elseif(_trt_arch MATCHES "^(x86_64|amd64)$")
    set(_trt_include_candidates
        "/usr/local/tensorrt/targets/x86_64-linux-gnu/include"
        "/usr/include/x86_64-linux-gnu" "/usr/include")
    set(_trt_library_candidates
        "/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
        "/usr/lib/x86_64-linux-gnu" "/usr/lib")
  else()
    message(FATAL_ERROR "Unknown architecture")
  endif()

  set(_modules nvinfer nvinfer_plugin)
  if(${TRT_MAJOR_VERSION} GREATER_EQUAL 8)
    list(APPEND _modules nvinfer_vc_plugin nvinfer_dispatch nvinfer_lean)
  endif()

  _guess_path(TensorRT_LIBRARY_DIR "libnvinfer.so;libnvinfer_plugin.so"
              ${_trt_library_candidates})
  message(STATUS "TensorRT libraries: ${TensorRT_LIBRARY_DIR}")
  _guess_path(TensorRT_INCLUDE_DIR "NvInfer.h" ${_trt_include_candidates})
  message(STATUS "TensorRT includes: ${TensorRT_INCLUDE_DIR}")
endif()

foreach(lib IN LISTS _modules)
  find_library(
    TensorRT_${lib}_LIBRARY
    NAMES ${lib}
    HINTS ${TensorRT_LIBRARY_DIR})
  list(APPEND TensorRT_LIBRARIES ${TensorRT_${lib}_LIBRARY})
endforeach()

target_link_libraries(TensorRT INTERFACE ${TensorRT_LIBRARIES})

message(STATUS "Found TensorRT libs: ${TensorRT_LIBRARIES}")

set_target_properties(
  TensorRT
  PROPERTIES C_STANDARD 17
             CXX_STANDARD 17
             POSITION_INDEPENDENT_CODE ON
             SKIP_BUILD_RPATH TRUE
             BUILD_WITH_INSTALL_RPATH TRUE
             INSTALL_RPATH "$ORIGIN"
             INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIR}")

unset(TRT_MAJOR_VERSION)
unset(_modules)
unset(_trt_include_candidates)
unset(_trt_library_candidates)
unset(_trt_arch)


================================================
FILE: alexnet/README.md
================================================
# alexnet

## Introduction

AlexNet model architecture comes from this paper: [One weird trick for parallelizing convolutional neural networks](https://arxiv.org/abs/1404.5997). To generate `.wts` file, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet). To check the pytorch implementation of AlexNet, refer to [HERE](https://github.com/pytorch/vision/blob/main/torchvision/models/alexnet.py#L17)

AlexNet consists of 3 major parts: features, adaptive average pooling, and classifier:

- features: just several stacked `CRP`(conv-relu-pool) and `CR` layers
- adaptive average pooling: pytorch can decide its inner parameters, but we need to calculate it ourselves in TensorRT API
- classifier: just several `fc-relu` layers. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addMatrixMultiply`, `addElementWise` etc.

## Use AlexNet from PyTorch

We can use torchvision to load the pretrained alexnet model:

```python
alexnet = torchvision.models.alexnet(pretrained=True)
```

The model structure is:

```bash
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)
```

## Usage

1. use `gen_wts.py` to generate wts file.

```bash
python3 gen_wts.py
```

2. build C++ code

```bash
pushd tensorrtx/alexnet
cmake -S . -B build -G Ninja --fresh
cmake --build build
```

3. serialize wts model to engine file.

```bash
./build/alexnet -s
```

4. run inference

```bash
./build/alexnet -d
```

output looks like:

```txt
...
====
Execution time: 1ms
0.1234, -0.5678, ...
====
prediction result:
Top: 0 idx: 285, logits: 9.9, label: Egyptian cat
Top: 1 idx: 281, logits: 8.304, label: tabby, tabby cat
Top: 2 idx: 282, logits: 6.859, label: tiger cat
```

## FAQ

### How to align the output with Pytorch?

If your output is different from pytorch, you have to check which TensorRT API or your code cause this. A simple solution would be check the `.engine` output part by part, e.g., you can set the early layer of alexnet as output:

```c++
fc3_1->getOutput(0)->setName(OUTPUT_NAME);
network->markOutput(*pool3->getOutput(0)); // original is: "*fc3_1->getOutput(0)"
```

For this line of code, i use the output from "feature" part of alexnet, ignoring the rest of the model, then, don't forget to change the `OUTPUT_SIZE` macro on top of the file, lastly, build the `.engine` file to apply the changes.

You can sum up all output from C++ code, and compare it with Pytorch output, for Pytorch, you can do this by: `torch.sum(x)` at debug phase. The ideal value deviation between 2 values would be $[10^{-1}, 10^{-2}]$, for this example, since the output elements for "feature" is $256 * 6 * 6$ (bacth = 1), the final error would roughly be $10^{-4}$.

Note: This is a quick check, for more accurate check, you have to save the output tensor into a file to compare them value by value, but this situation is rare.


================================================
FILE: alexnet/alexnet.cc
================================================
#include <array>
#include <chrono>
#include <cmath>
#include <opencv2/opencv.hpp>
#include <vector>
#include "logging.h"
#include "utils.h"

// stuff we know about alexnet
constexpr const int32_t N = 1;
constexpr const int32_t INPUT_H = 224;
constexpr const int32_t INPUT_W = 224;
constexpr const std::array<int64_t, 3> SIZES = {3ll * INPUT_H * INPUT_W, 1000};

constexpr const std::array<const char*, 2> NAMES = {"data", "prob"};
constexpr const char* ENGINE_PATH = "../models/alexnet.engine";
constexpr const char* WTS_PATH = "../models/alexnet.wts";
constexpr const char* LABELS_PATH = "../assets/imagenet1000_clsidx_to_labels.txt";
static constexpr const bool TRT_PREPROCESS = TRT_VERSION >= 8510 ? true : false;
static constexpr const std::array<const float, 3> mean = {0.485f, 0.456f, 0.406f};
static constexpr const std::array<const float, 3> stdv = {0.229f, 0.224f, 0.225f};

using WeightMap = std::map<std::string, Weights>;
using M = nvinfer1::MatrixOperation;
using E = nvinfer1::ElementWiseOperation;
using NDCF = nvinfer1::NetworkDefinitionCreationFlag;

static Logger gLogger;

/**
 * @brief Create the engine using TensorRT API and without any parser.
 *
 * @param N max batch size
 * @param builder
 * @param config
 * @param dt
 * @return ICudaEngine*
 */
ICudaEngine* createEngine(int32_t N, IRuntime* runtime, IBuilder* builder, IBuilderConfig* config, DataType dt) {
    WeightMap weightMap = loadWeights(WTS_PATH);

#if TRT_VERSION >= 11200
    auto flag = 1U << static_cast<int>(NDCF::kSTRONGLY_TYPED);
#elif TRT_VERSION >= 10000
    auto flag = 0U;
#else
    auto flag = 1U << static_cast<int>(NDCF::kEXPLICIT_BATCH);
#endif
    auto* network = builder->createNetworkV2(flag);

    // Create input tensor
    ITensor* input{nullptr};
    if constexpr (TRT_PREPROCESS) {
        dt = DataType::kUINT8;
        input = network->addInput(NAMES[0], dt, Dims4{N, INPUT_H, INPUT_W, 3});
        auto* trans = addTransformLayer(network, *input, true, mean, stdv);
        input = trans->getOutput(0);
    } else {
        input = network->addInput(NAMES[0], dt, Dims4{N, 3, INPUT_H, INPUT_W});
    }
    assert(input);

    // CRP (Conv-Relu-Pool)
    auto* conv1 = network->addConvolutionNd(*input, 64, DimsHW{11, 11}, weightMap["features.0.weight"],
                                            weightMap["features.0.bias"]);
    auto* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
    auto* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
    assert(conv1 && relu1 && pool1);
    conv1->setStrideNd(DimsHW{4, 4});
    conv1->setPaddingNd(DimsHW{2, 2});
    pool1->setStrideNd(DimsHW{2, 2});

    // CRP
    auto* conv2 = network->addConvolutionNd(*pool1->getOutput(0), 192, DimsHW{5, 5}, weightMap["features.3.weight"],
                                            weightMap["features.3.bias"]);
    auto* relu2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
    auto* pool2 = network->addPoolingNd(*relu2->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
    assert(conv2 && pool2 && relu2);
    conv2->setPaddingNd(DimsHW{2, 2});
    pool2->setStrideNd(DimsHW{2, 2});

    // CR
    auto* conv3 = network->addConvolutionNd(*pool2->getOutput(0), 384, DimsHW{3, 3}, weightMap["features.6.weight"],
                                            weightMap["features.6.bias"]);
    auto* relu3 = network->addActivation(*conv3->getOutput(0), ActivationType::kRELU);
    assert(conv3 && relu3);
    conv3->setPaddingNd(DimsHW{1, 1});

    // CR
    auto* conv4 = network->addConvolutionNd(*relu3->getOutput(0), 256, DimsHW{3, 3}, weightMap["features.8.weight"],
                                            weightMap["features.8.bias"]);
    auto* relu4 = network->addActivation(*conv4->getOutput(0), ActivationType::kRELU);
    assert(conv4 && relu4);
    conv4->setPaddingNd(DimsHW{1, 1});

    // CRP
    auto* conv5 = network->addConvolutionNd(*relu4->getOutput(0), 256, DimsHW{3, 3}, weightMap["features.10.weight"],
                                            weightMap["features.10.bias"]);
    auto* relu5 = network->addActivation(*conv5->getOutput(0), ActivationType::kRELU);
    assert(conv5);
    auto* pool3 = network->addPoolingNd(*relu5->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
    assert(conv5 && relu5 && pool3);
    conv5->setPaddingNd(DimsHW{1, 1});
    pool3->setStrideNd(DimsHW{2, 2});

    // adaptive avgerage pooling
    auto* adaptive_pool = network->addPoolingNd(*pool3->getOutput(0), PoolingType::kAVERAGE, DimsHW{1, 1});
    assert(adaptive_pool);
    IShuffleLayer* shuffle = network->addShuffle(*adaptive_pool->getOutput(0));
    assert(shuffle);
    shuffle->setReshapeDimensions(Dims2{N, -1});  // "-1" means "256 * 6 * 6"

    // all classifier tensors
    int64_t in_feat = 256ll * 6 * 6;
    auto* fc1w = network->addConstant(DimsHW{4096, in_feat}, weightMap["classifier.1.weight"])->getOutput(0);
    auto* fc1b = network->addConstant(DimsHW{1, 4096}, weightMap["classifier.1.bias"])->getOutput(0);
    auto* fc2w = network->addConstant(DimsHW{4096, 4096}, weightMap["classifier.4.weight"])->getOutput(0);
    auto* fc2b = network->addConstant(DimsHW{1, 4096}, weightMap["classifier.4.bias"])->getOutput(0);
    auto* fc3w = network->addConstant(DimsHW{1000, 4096}, weightMap["classifier.6.weight"])->getOutput(0);
    auto* fc3b = network->addConstant(DimsHW{1, 1000}, weightMap["classifier.6.bias"])->getOutput(0);
    assert(fc1w && fc1b && fc2w && fc2b && fc3w && fc3b);

    // all layers in classifier
    auto* fc1_0 = network->addMatrixMultiply(*shuffle->getOutput(0), M::kNONE, *fc1w, M::kTRANSPOSE);
    auto* fc1_1 = network->addElementWise(*fc1_0->getOutput(0), *fc1b, E::kSUM);
    auto* relu6 = network->addActivation(*fc1_1->getOutput(0), ActivationType::kRELU);
    assert(fc1_0 && fc1_1 && relu6);
    fc1_0->setName("fc1_0");  // set name here, only for debug purpose
    auto* fc2_0 = network->addMatrixMultiply(*relu6->getOutput(0), M::kNONE, *fc2w, M::kTRANSPOSE);
    auto* fc2_1 = network->addElementWise(*fc2_0->getOutput(0), *fc2b, E::kSUM);
    auto* relu7 = network->addActivation(*fc2_1->getOutput(0), ActivationType::kRELU);
    assert(fc2_0 && fc2_1 && relu7);
    fc2_0->setName("fc2_0");
    auto* fc3_0 = network->addMatrixMultiply(*relu7->getOutput(0), M::kNONE, *fc3w, M::kTRANSPOSE);
    auto* fc3_1 = network->addElementWise(*fc3_0->getOutput(0), *fc3b, E::kSUM);
    assert(fc3_0 && fc3_1);
    fc3_0->setName("fc3_0");

    fc3_1->getOutput(0)->setName(NAMES[1]);
    network->markOutput(*fc3_1->getOutput(0));

    // Build engine
#if TRT_VERSION >= 8000
    config->setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, WORKSPACE_SIZE);
    auto* host_mem = builder->buildSerializedNetwork(*network, *config);
    auto* engine = runtime->deserializeCudaEngine(host_mem->data(), host_mem->size());
    delete network;
#else
    builder->setMaxBatchSize(N);
    config->setMaxWorkspaceSize(WORKSPACE_SIZE);
    auto* engine = builder->buildEngineWithConfig(*network, *config);
    network->destroy();
#endif

    std::cout << "build finished\n";
    for (auto& mem : weightMap) {
        free((void*)(mem.second.values));
    }

    return engine;
}

void APIToModel(int32_t N, IRuntime* runtime, IHostMemory** modelStream) {
    // Create builder
    IBuilder* builder = createInferBuilder(gLogger);
    IBuilderConfig* config = builder->createBuilderConfig();

    // Create model to populate the network, then set the outputs and create an engine
    ICudaEngine* engine = createEngine(N, runtime, builder, config, DataType::kFLOAT);
    assert(engine != nullptr);

    // Serialize the engine
    (*modelStream) = engine->serialize();

    // Close everything down
#if TRT_VERSION >= 8000
    delete engine;
    delete config;
    delete builder;
#else
    engine->destroy();
    config->destroy();
    builder->destroy();
#endif
}

std::vector<std::vector<float>> doInference(IExecutionContext& context, const std::string& img_path,
                                            std::size_t batchSize) {
    static std::vector<float> flat_img;
    auto img = cv::imread(img_path, cv::IMREAD_COLOR);
    void* input = nullptr;

    // use preprocess from gpu(TensorRT) or cpu(OpenCV)
    if constexpr (TRT_PREPROCESS) {
        // for simplicity, resize image on cpu side
        cv::resize(img, img, cv::Size(INPUT_W, INPUT_H), 0, 0, cv::INTER_LINEAR);
        input = static_cast<void*>(img.data);
    } else {
        flat_img = preprocess_img(img, true, mean, stdv, N, INPUT_H, INPUT_W);
        input = flat_img.data();
    }
    assert(input);

    const ICudaEngine& engine = context.getEngine();
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));
    std::vector<void*> buffers;

#if TRT_VERSION >= 8000
    const int32_t nIO = engine.getNbIOTensors();
#else
    const int32_t nIO = engine.getNbBindings();
#endif

    buffers.resize(nIO);
    for (auto i = 0; i < nIO; ++i) {
#if TRT_VERSION >= 8000
        auto* tensor_name = engine.getIOTensorName(i);
        auto s = getSize(engine.getTensorDataType(tensor_name));
        std::size_t size = s * batchSize * SIZES[i];
        CHECK(cudaMalloc(&buffers[i], size));
        context.setTensorAddress(tensor_name, buffers[i]);
#else
        const int32_t idx = engine.getBindingIndex(NAMES[i]);
        auto s = getSize(engine.getBindingDataType(idx));
        assert(idx == i);
        std::size_t size = s * batchSize * SIZES[i];
        CHECK(cudaMalloc(&buffers[i], size));
#endif
        if (i == 0) {
            CHECK(cudaMemcpyAsync(buffers[i], input, size, cudaMemcpyHostToDevice, stream));
        }
    }

#if TRT_VERSION >= 8000
    assert(context.enqueueV3(stream));
#else
    assert(context.enqueueV2(buffers.data(), stream, nullptr));
#endif

    std::vector<std::vector<float>> prob;
    for (int i = 1; i < nIO; ++i) {
        std::vector<float> tmp(batchSize * SIZES[i], std::nanf(""));
        std::size_t size = batchSize * SIZES[i] * sizeof(float);
        CHECK(cudaMemcpyAsync(tmp.data(), buffers[i], size, cudaMemcpyDeviceToHost, stream));
        prob.emplace_back(tmp);
    }
    CHECK(cudaStreamSynchronize(stream));

    cudaStreamDestroy(stream);
    for (auto i = 0; i < nIO; ++i) {
        CHECK(cudaFree(buffers[i]));
    }
    return prob;
}

int main(int argc, char** argv) {
    checkTrtEnv();
    if (argc != 2) {
        std::cerr << "arguments not right!\n";
        std::cerr << "./alexnet -s   // serialize model to plan file\n";
        std::cerr << "./alexnet -d   // deserialize plan file and run inference\n";
        return -1;
    }

    IRuntime* runtime = createInferRuntime(gLogger);
    assert(runtime != nullptr);

    // create a model using the API directly and serialize it to a stream
    char* trtModelStream{nullptr};
    std::streamsize size{0};

    if (std::string(argv[1]) == "-s") {
        IHostMemory* modelStream{nullptr};
        APIToModel(N, runtime, &modelStream);
        assert(modelStream != nullptr);

        std::ofstream p(ENGINE_PATH, std::ios::binary | std::ios::trunc);
        if (!p) {
            std::cerr << "could not open plan output file\n";
            return -1;
        }
        if (modelStream->size() > static_cast<std::size_t>(std::numeric_limits<std::streamsize>::max())) {
            std::cerr << "this model is too large to serialize\n";
            return -1;
        }
        const auto* data_ptr = reinterpret_cast<const char*>(modelStream->data());
        auto data_size = static_cast<std::streamsize>(modelStream->size());
        p.write(data_ptr, data_size);

#if TRT_VERSION >= 8000
        delete modelStream;
#else
        modelStream->destroy();
#endif
        return 0;
    } else if (std::string(argv[1]) == "-d") {
        std::ifstream file(ENGINE_PATH, std::ios::binary);
        if (file.good()) {
            file.seekg(0, file.end);
            size = file.tellg();
            file.seekg(0, file.beg);
            trtModelStream = new char[size];
            assert(trtModelStream);
            file.read(trtModelStream, size);
            file.close();
        }
    } else {
        return -1;
    }

#if TRT_VERSION >= 8000
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
#else
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
#endif
    assert(engine != nullptr);

    IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);

    const std::string img_path = "../assets/cats.jpg";
    for (int32_t i = 0; i < 100; ++i) {
        auto _start = std::chrono::system_clock::now();
        auto prob = doInference(*context, img_path, N);
        auto _end = std::chrono::system_clock::now();
        auto _time = std::chrono::duration_cast<std::chrono::milliseconds>(_end - _start).count();
        std::cout << "Execution time: " << _time << "ms\n";

        for (const auto& vector : prob) {
            int idx = 0;
            for (auto v : vector) {
                std::cout << std::setprecision(4) << v << ", " << std::flush;
                if (++idx > 20) {
                    std::cout << "\n====\n";
                    break;
                }
            }
        }

        if (i == 99) {
            std::cout << "prediction result:\n";
            auto labels = loadImagenetLabelMap(LABELS_PATH);
            int _top = 0;
            for (auto& [idx, logits] : topk(prob[0], 3)) {
                std::cout << "Top: " << _top++ << " idx: " << idx << ", logits: " << logits
                          << ", label: " << labels[idx] << "\n";
            }
        }
    }

#if TRT_VERSION >= 8000
    delete context;
    delete engine;
    delete runtime;
#else
    context->destroy();
    engine->destroy();
    runtime->destroy();
#endif
    return 0;
}


================================================
FILE: alexnet/alexnet.py
================================================
import os
import sys
import struct
import argparse

import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt

BATCH_SIZE = 1
INPUT_H = 224
INPUT_W = 224
OUTPUT_SIZE = 1000
INPUT_BLOB_NAME = "data"
OUTPUT_BLOB_NAME = "prob"

WEIGHT_PATH = "./alexnet.wts"
ENGINE_PATH = "./alexnet.engine"

TRT_LOGGER = trt.Logger(trt.Logger.INFO)


def load_weights(file):
    print(f"Loading weights: {file}")

    assert os.path.exists(file), 'Unable to load weight file.'

    weight_map = {}
    with open(file, "r") as f:
        lines = [line.strip() for line in f]
    count = int(lines[0])
    assert count == len(lines) - 1
    for i in range(1, count + 1):
        splits = lines[i].split(" ")
        name = splits[0]
        cur_count = int(splits[1])
        assert cur_count + 2 == len(splits)
        values = []
        for j in range(2, len(splits)):
            # hex string to bytes to float
            values.append(struct.unpack(">f", bytes.fromhex(splits[j])))
        weight_map[name] = np.array(values, dtype=np.float32)

    return weight_map


def create_engine(max_batch_size, builder, config, dt):
    weight_map = load_weights(WEIGHT_PATH)
    network = builder.create_network()

    data = network.add_input(INPUT_BLOB_NAME, dt, (3, INPUT_H, INPUT_W))
    assert data

    conv1 = network.add_convolution(input=data,
                                    num_output_maps=64,
                                    kernel_shape=(11, 11),
                                    kernel=weight_map["features.0.weight"],
                                    bias=weight_map["features.0.bias"])
    assert conv1
    conv1.stride = (4, 4)
    conv1.padding = (2, 2)

    relu1 = network.add_activation(conv1.get_output(0), type=trt.ActivationType.RELU)
    assert relu1

    pool1 = network.add_pooling(input=relu1.get_output(0),
                                type=trt.PoolingType.MAX,
                                window_size=trt.DimsHW(3, 3))
    assert pool1
    pool1.stride_nd = (2, 2)

    conv2 = network.add_convolution(input=pool1.get_output(0),
                                    num_output_maps=192,
                                    kernel_shape=(5, 5),
                                    kernel=weight_map["features.3.weight"],
                                    bias=weight_map["features.3.bias"])
    assert conv2
    conv2.padding = (2, 2)

    relu2 = network.add_activation(conv2.get_output(0), type=trt.ActivationType.RELU)
    assert relu2

    pool2 = network.add_pooling(input=relu2.get_output(0),
                                type=trt.PoolingType.MAX,
                                window_size=trt.DimsHW(3, 3))
    assert pool2
    pool2.stride_nd = (2, 2)

    conv3 = network.add_convolution(input=pool2.get_output(0),
                                    num_output_maps=384,
                                    kernel_shape=(3, 3),
                                    kernel=weight_map["features.6.weight"],
                                    bias=weight_map["features.6.bias"])
    assert conv3
    conv3.padding = (1, 1)

    relu3 = network.add_activation(conv3.get_output(0), type=trt.ActivationType.RELU)
    assert relu3

    conv4 = network.add_convolution(input=relu3.get_output(0),
                                    num_output_maps=256,
                                    kernel_shape=(3, 3),
                                    kernel=weight_map["features.8.weight"],
                                    bias=weight_map["features.8.bias"])
    assert conv4
    conv4.padding = (1, 1)

    relu4 = network.add_activation(conv4.get_output(0), type=trt.ActivationType.RELU)
    assert relu4

    conv5 = network.add_convolution(input=relu4.get_output(0),
                                    num_output_maps=256,
                                    kernel_shape=(3, 3),
                                    kernel=weight_map["features.10.weight"],
                                    bias=weight_map["features.10.bias"])
    assert conv5
    conv5.padding = (1, 1)

    relu5 = network.add_activation(conv5.get_output(0), type=trt.ActivationType.RELU)
    assert relu5

    pool3 = network.add_pooling(input=relu5.get_output(0),
                                type=trt.PoolingType.MAX,
                                window_size=trt.DimsHW(3, 3))
    assert pool3
    pool3.stride_nd = (2, 2)

    fc1 = network.add_fully_connected(input=pool3.get_output(0),
                                      num_outputs=4096,
                                      kernel=weight_map["classifier.1.weight"],
                                      bias=weight_map["classifier.1.bias"])
    assert fc1

    relu6 = network.add_activation(fc1.get_output(0), type=trt.ActivationType.RELU)
    assert relu6

    fc2 = network.add_fully_connected(input=relu6.get_output(0),
                                      num_outputs=4096,
                                      kernel=weight_map["classifier.4.weight"],
                                      bias=weight_map["classifier.4.bias"])
    assert fc2

    relu7 = network.add_activation(fc2.get_output(0), type=trt.ActivationType.RELU)
    assert relu7

    fc3 = network.add_fully_connected(input=relu7.get_output(0),
                                      num_outputs=1000,
                                      kernel=weight_map["classifier.6.weight"],
                                      bias=weight_map["classifier.6.bias"])
    assert fc3

    fc3.get_output(0).name = OUTPUT_BLOB_NAME
    network.mark_output(fc3.get_output(0))

    # Build Engine
    builder.max_batch_size = max_batch_size
    builder.max_workspace_size = 1 << 20
    engine = builder.build_engine(network, config)

    del network
    del weight_map

    return engine


def API_to_model(max_batch_size):
    builder = trt.Builder(TRT_LOGGER)
    config = builder.create_builder_config()
    engine = create_engine(max_batch_size, builder, config, trt.float32)
    assert engine
    with open(ENGINE_PATH, "wb") as f:
        f.write(engine.serialize())

    del engine
    del builder
    del config


class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream


def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-s", action='store_true')
    parser.add_argument("-d", action='store_true')
    args = parser.parse_args()

    if not (args.s ^ args.d):
        print(
            "arguments not right!\n"
            "python alexnet.py -s   # serialize model to plan file\n"
            "python alexnet.py -d   # deserialize plan file and run inference"
        )
        sys.exit()

    if args.s:
        API_to_model(BATCH_SIZE)
    else:
        runtime = trt.Runtime(TRT_LOGGER)
        assert runtime

        with open(ENGINE_PATH, "rb") as f:
            engine = runtime.deserialize_cuda_engine(f.read())
        assert engine

        context = engine.create_execution_context()
        assert context

        data = np.ones((BATCH_SIZE * 3 * INPUT_H * INPUT_W), dtype=np.float32)
        inputs, outputs, bindings, stream = allocate_buffers(engine)
        inputs[0].host = data

        trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

        print(f'Output: \n{trt_outputs[0][:10]}\n{trt_outputs[0][-10:]}')


================================================
FILE: alexnet/gen_wts.py
================================================
import struct

import cv2
import numpy as np
import torch
from torchvision.models import alexnet


def read_imagenet_labels() -> dict[int, str]:
    """
    read ImageNet 1000 labels

    Returns:
        dict[int, str]: labels dict
    """
    clsid2label = {}
    with open("../assets/imagenet1000_clsidx_to_labels.txt", "r") as f:
        for i in f.readlines():
            k, v = i.split(": ")
            clsid2label.setdefault(int(k), v[1:-3])
    return clsid2label


def preprocess(img: np.array) -> torch.Tensor:
    """
    a preprocess method align with ImageNet dataset

    Args:
        img (np.array): input image

    Returns:
        torch.Tensor: preprocessed image in `NCHW` layout
    """
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR)
    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    img = (img - mean) / std
    img = img.transpose(2, 0, 1)[None, ...]
    return torch.from_numpy(img)


if __name__ == "__main__":
    img = cv2.imread("../assets/cats.jpg", cv2.IMREAD_COLOR)
    img = preprocess(img)
    model = alexnet(pretrained=True)
    model.eval()
    output = model(img)
    labels = read_imagenet_labels()
    for batch in torch.topk(output, k=3).indices:
        for i, j in enumerate(batch, 1):
            print(f"top: {i:<2}, confidence: {float(output[0, j]):.4f}, label: {labels[int(j)]}")

    print("writing alexnet wts")
    with open("../models/alexnet.wts", "w") as f:
        f.write("{}\n".format(len(model.state_dict().keys())))
        for k, v in model.state_dict().items():
            print(f"key: {k}\tvalue: {v.shape}")
            vr = v.reshape(-1).cpu().numpy()
            f.write("{} {}".format(k, len(vr)))
            for vv in vr:
                f.write(" ")
                f.write(struct.pack(">f", float(vv)).hex())
            f.write("\n")


================================================
FILE: alexnet/logging.h
================================================
/*
 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H

#include <cassert>
#include <cstdint>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <sstream>
#include <string>
#include <utility>
#include "NvInferRuntime.h"
#include "macros.h"

using Severity = nvinfer1::ILogger::Severity;

class LogStreamConsumerBuffer : public std::stringbuf {
   public:
    LogStreamConsumerBuffer(std::ostream& stream, std::string prefix, bool shouldLog)
        : mOutput(stream), mPrefix(std::move(prefix)), mShouldLog(shouldLog) {}

    LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
        : mOutput(other.mOutput), mPrefix(std::move(other.mPrefix)), mShouldLog(other.mShouldLog) {}

    ~LogStreamConsumerBuffer() override {
        // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
        // std::streambuf::pptr() gives a pointer to the current position of the output sequence
        // if the pointer to the beginning is not equal to the pointer to the current position,
        // call putOutput() to log the output to the stream
        if (pbase() != pptr()) {
            putOutput();
        }
    }

    // synchronizes the stream buffer and returns 0 on success
    // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
    // resetting the buffer and flushing the stream
    int sync() override {
        putOutput();
        return 0;
    }

    void putOutput() {
        if (mShouldLog) {
            // prepend timestamp
            std::time_t timestamp = std::time(nullptr);
            tm* tm_local = std::localtime(&timestamp);
            std::cout << "[";
            std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
            std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
            // std::stringbuf::str() gets the string contents of the buffer
            // insert the buffer contents pre-appended by the appropriate prefix into the stream
            mOutput << mPrefix << str();
            // set the buffer to empty
            str("");
            // flush the stream
            mOutput.flush();
        }
    }

    void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }

   private:
    std::ostream& mOutput;
    std::string mPrefix;
    bool mShouldLog;
};

//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase {
   public:
    LogStreamConsumerBase(std::ostream& stream, std::string prefix, bool shouldLog)
        : mBuffer(stream, std::move(prefix), shouldLog) {}

   protected:
    LogStreamConsumerBuffer mBuffer;
};

//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
//!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
//!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
//!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
//!  Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream {
   public:
    //! \brief Creates a LogStreamConsumer which logs messages with level severity.
    //!  Reportable severity determines if the messages are severe enough to be logged.
    LogStreamConsumer(Severity reportableSeverity, Severity severity)
        : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity),
          std::ostream(&mBuffer)  // links the stream buffer with the stream
          ,
          mShouldLog(severity <= reportableSeverity),
          mSeverity(severity) {}

    LogStreamConsumer(LogStreamConsumer&& other) noexcept
        : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog),
          std::ostream(&mBuffer)  // links the stream buffer with the stream
          ,
          mShouldLog(other.mShouldLog),
          mSeverity(other.mSeverity) {}

    void setReportableSeverity(Severity reportableSeverity) {
        mShouldLog = mSeverity <= reportableSeverity;
        mBuffer.setShouldLog(mShouldLog);
    }

   private:
    static std::ostream& severityOstream(Severity severity) {
        return severity >= Severity::kINFO ? std::cout : std::cerr;
    }

    static std::string severityPrefix(Severity severity) {
        switch (severity) {
            case Severity::kINTERNAL_ERROR:
                return "[F] ";
            case Severity::kERROR:
                return "[E] ";
            case Severity::kWARNING:
                return "[W] ";
            case Severity::kINFO:
                return "[I] ";
            case Severity::kVERBOSE:
                return "[V] ";
            default:
                assert(0);
                return "";
        }
    }

    bool mShouldLog;
    Severity mSeverity;
};

//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
//! object.

class Logger : public nvinfer1::ILogger {
   private:
    struct TestInfo;

   public:
    Logger(Severity severity = Severity::kWARNING) : mReportableSeverity(severity) {}

    //!
    //! \enum TestResult
    //! \brief Represents the state of a given test
    //!
    enum class TestResult : std::uint8_t {
        kRUNNING,  //!< The test is running
        kPASSED,   //!< The test passed
        kFAILED,   //!< The test failed
        kWAIVED    //!< The test was waived
    };

    //!
    //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
    //! \return The nvinfer1::ILogger associated with this Logger
    //!
    //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
    //! we can eliminate the inheritance of Logger from ILogger
    //!
    nvinfer1::ILogger& getTRTLogger() { return *this; }

    //!
    //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
    //!
    //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
    //! inheritance from nvinfer1::ILogger
    //!
    void log(Severity severity, const char* msg) TRT_NOEXCEPT override {
        LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << '\n';
    }

    //!
    //! \brief Method for controlling the verbosity of logging output
    //!
    //! \param severity The logger will only emit messages that have severity of this level or higher.
    //!
    void setReportableSeverity(Severity severity) { mReportableSeverity = severity; }

    //!
    //! \brief Opaque handle that holds logging information for a particular test
    //!
    //! This object is an opaque handle to information used by the Logger to print test results.
    //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
    //! with Logger::reportTest{Start,End}().
    //!
    class TestAtom {
       public:
        TestAtom(TestAtom&&) = default;

       private:
        friend class Logger;

        TestAtom(bool started, TestInfo info)
            : mStarted(started), mName(std::move(info.name)), mCmdline(std::move(info.cmdline)) {}

        bool mStarted;
        std::string mName;
        std::string mCmdline;
    };

    //!
    //! \brief Define a test for logging
    //!
    //! \param[in] name The name of the test.  This should be a string starting with
    //!                  "TensorRT" and containing dot-separated strings containing
    //!                  the characters [A-Za-z0-9_].
    //!                  For example, "TensorRT.sample_googlenet"
    //! \param[in] cmdline The command line used to reproduce the test
    //
    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
    //!
    static TestAtom defineTest(const std::string& name, const std::string& cmdline) {
        return TestAtom{false, TestInfo{name, cmdline}};
    }

    //!
    //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
    //!        as input
    //!
    //! \param[in] name The name of the test
    //! \param[in] argc The number of command-line arguments
    //! \param[in] argv The array of command-line arguments (given as C strings)
    //!
    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
    static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) {
        auto cmdline = genCmdlineString(argc, argv);
        return defineTest(name, cmdline);
    }

    //!
    //! \brief Report that a test has started.
    //!
    //! \pre reportTestStart() has not been called yet for the given testAtom
    //!
    //! \param[in] testAtom The handle to the test that has started
    //!
    static void reportTestStart(TestAtom& testAtom) {
        reportTestResult(testAtom, TestResult::kRUNNING);
        assert(!testAtom.mStarted);
        testAtom.mStarted = true;
    }

    //!
    //! \brief Report that a test has ended.
    //!
    //! \pre reportTestStart() has been called for the given testAtom
    //!
    //! \param[in] testAtom The handle to the test that has ended
    //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
    //!                   TestResult::kFAILED, TestResult::kWAIVED
    //!
    static void reportTestEnd(const TestAtom& testAtom, TestResult result) {
        assert(result != TestResult::kRUNNING);
        assert(testAtom.mStarted);
        reportTestResult(testAtom, result);
    }

    static int reportPass(const TestAtom& testAtom) {
        reportTestEnd(testAtom, TestResult::kPASSED);
        return EXIT_SUCCESS;
    }

    static int reportFail(const TestAtom& testAtom) {
        reportTestEnd(testAtom, TestResult::kFAILED);
        return EXIT_FAILURE;
    }

    static int reportWaive(const TestAtom& testAtom) {
        reportTestEnd(testAtom, TestResult::kWAIVED);
        return EXIT_SUCCESS;
    }

    static int reportTest(const TestAtom& testAtom, bool pass) {
        return pass ? reportPass(testAtom) : reportFail(testAtom);
    }

    [[nodiscard]] Severity getReportableSeverity() const { return mReportableSeverity; }

   private:
    struct TestInfo {
        std::string name;
        std::string cmdline;
    };
    //!
    //! \brief returns an appropriate string for prefixing a log message with the given severity
    //!
    static const char* severityPrefix(Severity severity) {
        switch (severity) {
            case Severity::kINTERNAL_ERROR:
                return "[F] ";
            case Severity::kERROR:
                return "[E] ";
            case Severity::kWARNING:
                return "[W] ";
            case Severity::kINFO:
                return "[I] ";
            case Severity::kVERBOSE:
                return "[V] ";
            default:
                assert(0);
                return "";
        }
    }

    //!
    //! \brief returns an appropriate string for prefixing a test result message with the given result
    //!
    static const char* testResultString(TestResult result) {
        switch (result) {
            case TestResult::kRUNNING:
                return "RUNNING";
            case TestResult::kPASSED:
                return "PASSED";
            case TestResult::kFAILED:
                return "FAILED";
            case TestResult::kWAIVED:
                return "WAIVED";
            default:
                assert(0);
                return "";
        }
    }

    //!
    //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
    //!
    static std::ostream& severityOstream(Severity severity) {
        return severity >= Severity::kINFO ? std::cout : std::cerr;
    }

    //!
    //! \brief method that implements logging test results
    //!
    static void reportTestResult(const TestAtom& testAtom, TestResult result) {
        severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
                                         << testAtom.mCmdline << '\n';
    }

    //!
    //! \brief generate a command line string from the given (argc, argv) values
    //!
    static std::string genCmdlineString(int argc, char const* const* argv) {
        std::stringstream ss;
        for (int i = 0; i < argc; i++) {
            if (i > 0)
                ss << " ";
            ss << argv[i];
        }
        return ss.str();
    }

    Severity mReportableSeverity;
};

namespace {

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
//!
//! Example usage:
//!
//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
    return LogStreamConsumer{logger.getReportableSeverity(), Severity::kVERBOSE};
}

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
//!
//! Example usage:
//!
//!     LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger) {
    return LogStreamConsumer{logger.getReportableSeverity(), Severity::kINFO};
}

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
//!
//! Example usage:
//!
//!     LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger) {
    return LogStreamConsumer{logger.getReportableSeverity(), Severity::kWARNING};
}

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
//!
//! Example usage:
//!
//!     LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
    return LogStreamConsumer{logger.getReportableSeverity(), Severity::kERROR};
}

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
//         ("fatal" severity)
//!
//! Example usage:
//!
//!     LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger) {
    return LogStreamConsumer{logger.getReportableSeverity(), Severity::kINTERNAL_ERROR};
}

}  // anonymous namespace

#endif  // TENSORRT_LOGGING_H


================================================
FILE: alexnet/macros.h
================================================
#pragma once

#include <NvInfer.h>

#ifdef API_EXPORTS
#if defined(_MSC_VER)
#define API __declspec(dllexport)
#else
#define API __attribute__((visibility("default")))
#endif
#else

#if defined(_MSC_VER)
#define API __declspec(dllimport)
#else
#define API
#endif
#endif  // API_EXPORTS

#define TRT_VERSION \
    ((NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + (NV_TENSORRT_PATCH * 10) + NV_TENSORRT_BUILD)

#if TRT_VERSION >= 8000
#define TRT_NOEXCEPT noexcept
#define TRT_CONST_ENQUEUE const
#else
#define TRT_NOEXCEPT
#define TRT_CONST_ENQUEUE
#endif


================================================
FILE: alexnet/utils.h
================================================
#pragma once
#include <cuda_runtime_api.h>
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <fstream>
#include <iostream>
#include <map>
#include <memory>
#include <numeric>
#include <opencv2/opencv.hpp>
#include <string>
#include <vector>
#include "macros.h"

using namespace nvinfer1;

constexpr const std::size_t WORKSPACE_SIZE = 16 << 20;

#define CHECK(status)                                     \
    do {                                                  \
        auto ret = (status);                              \
        if (ret != cudaSuccess) {                         \
            std::cerr << "Cuda failure: " << ret << "\n"; \
            std::abort();                                 \
        }                                                 \
    } while (0)

static void checkTrtEnv(int device = 0) {
#if TRT_VERSION < 8000
    CHECK(cudaGetDevice(&device));
    cudaDeviceProp prop{};
    CHECK(cudaGetDeviceProperties(&prop, device));
    const int sm = prop.major * 10 + prop.minor;
    if (sm > 86) {
        std::cerr << "TensorRT < 8 does not support SM > 86 on this GPU.";
        std::abort();
    }
#endif
}

/**
 * @brief TensorRT weight files have a simple space delimited format:
 * [type] [size] <data x size in hex>
 * 
 * @param file input weight file path
 * @return std::map<std::string, nvinfer1::Weights> 
 */
static auto loadWeights(const std::string& file) {
    std::cout << "Loading weights: " << file << "\n";
    std::map<std::string, nvinfer1::Weights> weightMap;

    // Open weights file
    std::ifstream input(file);
    assert(input.is_open() && "Unable to load weight file.");

    // Read number of weight blobs
    int32_t count;
    input >> count;
    assert(count > 0 && "Invalid weight map file.");

    while (count--) {
        nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0};

        // Read name and type of blob
        std::string name;
        input >> name >> std::dec >> wt.count;

        // Load blob
        auto* val = new uint32_t[wt.count];
        input >> std::hex;
        for (auto x = 0ll; x < wt.count; ++x) {
            input >> val[x];
        }
        wt.values = val;
        weightMap[name] = wt;
    }

    return weightMap;
}

/**
 * @brief a preprocess function aligning with ImageNet preprocess in torchvision, only support 3-channel image
 * 
 * @param img opencv image with BGR layout
 * @param bgr2rgb whether to convert BGR to RGB
 * @param mean subtract mean
 * @param std divide std
 * @param n batch size
 * @param h resize height
 * @param w resize width
 * @return std::vector<float> contiguous flatten image data in float32 type
 */
static std::vector<float> preprocess_img(cv::Mat& img, bool bgr2rgb, const std::array<const float, 3>& mean,
                                         const std::array<const float, 3>& std, int n, int h, int w) {
    const auto c = img.channels();
    const auto size = c * h * w;
    if (c != 3) {
        std::cerr << "this demo only supports 3 channel input image.\n";
        std::abort();
    }
    if (bgr2rgb) {
        cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
    }
    cv::resize(img, img, cv::Size(w, h), 0, 0, cv::INTER_LINEAR);
    img.convertTo(img, CV_32FC3, 1.f / 255);
    img = (img - cv::Scalar(mean[0], mean[1], mean[2])) / cv::Scalar(std[0], std[1], std[2]);
    std::vector<float> chw(static_cast<std::size_t>(n) * c * h * w, 0.f);

    // fill all batch with the same input image
    for (int i = 0; i < n; ++i) {
        for (int y = 0; y < h; ++y) {
            for (int x = 0; x < w; ++x) {
                const cv::Vec3f v = img.at<cv::Vec3f>(y, x);
                chw[i * size + 0 * h * w + y * w + x] = v[0];
                chw[i * size + 1 * h * w + y * w + x] = v[1];
                chw[i * size + 2 * h * w + y * w + x] = v[2];
            }
        }
    }
    return chw;
}

static auto topk(const std::vector<float>& v, int k) -> std::vector<std::pair<int, float>> {
    if (k <= 0)
        return {};
    auto stride = std::min<std::ptrdiff_t>(k, static_cast<int64_t>(v.size()));

    std::vector<int> idx(v.size());
    std::iota(idx.begin(), idx.end(), 0);

    std::partial_sort(idx.begin(), idx.begin() + k, idx.end(), [&](int a, int b) { return v[a] > v[b]; });

    std::vector<std::pair<int, float>> out;
    out.reserve(stride);
    for (auto i = 0; i < stride; ++i)
        out.emplace_back(idx[i], v[idx[i]]);
    return out;
}

static std::map<int, std::string> loadImagenetLabelMap(const std::string& path) {
    std::map<int, std::string> labels;
    std::ifstream in(path);
    if (!in.is_open()) {
        return labels;
    }
    std::string line;
    while (std::getline(in, line)) {
        auto colon = line.find(':');
        if (colon == std::string::npos) {
            continue;
        }
        auto first_quote = line.find('\'', colon);
        if (first_quote == std::string::npos) {
            continue;
        }
        auto second_quote = line.find('\'', first_quote + 1);
        if (second_quote == std::string::npos) {
            continue;
        }
        int idx = std::stoi(line.substr(0, colon));
        labels[idx] = line.substr(first_quote + 1, second_quote - first_quote - 1);
    }
    return labels;
}

static ILayer* addTransformLayer(INetworkDefinition* network, ITensor& input, bool bgr2rgb,
                                 const std::array<const float, 3>& mean, const std::array<const float, 3>& std) {
    struct ScaleParams {
        std::array<float, 3> shift;
        std::array<float, 3> scale;
    };
    static std::vector<std::unique_ptr<ScaleParams>> gScaleParams;
    auto params = std::make_unique<ScaleParams>();
    params->shift = {-mean[0] / std[0], -mean[1] / std[1], -mean[2] / std[2]};
    params->scale = {1.f / (std[0] * 255.f), 1.f / (std[1] * 255.f), 1.f / (std[2] * 255.f)};

    static const Weights empty{DataType::kFLOAT, nullptr, 0ll};
    const Weights shift{DataType::kFLOAT, params->shift.data(), 3ll};
    const Weights scale{DataType::kFLOAT, params->scale.data(), 3ll};

    gScaleParams.emplace_back(std::move(params));

    ITensor* in = &input;
    if (input.getType() != DataType::kFLOAT) {
#if TRT_VERSION >= 8000
        auto* cast = network->addCast(input, DataType::kFLOAT);
        assert(cast);
        cast->setName("Cast to FP32");
        in = cast->getOutput(0);
#else
        auto* identity = network->addIdentity(input);
        assert(identity);
        identity->setName("Convert to FP32");
        identity->setOutputType(0, DataType::kFLOAT);
        in = identity->getOutput(0);
#endif
    }
    // Convert from NHWC to NCHW
    auto* perm = network->addShuffle(*in);
    assert(perm);
    perm->setName("NHWC -> NCHW");
    perm->setFirstTranspose(Permutation{0, 3, 1, 2});

    // Convert from BGR to RGB (optional)
    ITensor* data{nullptr};
    if (bgr2rgb) {
        auto add_slice = [&](int c, const char* name) -> ITensor* {
            auto dims = perm->getOutput(0)->getDimensions();
            Dims4 start = {0, c, 0, 0}, stride = {1, 1, 1, 1};
            Dims4 size = {dims.d[0], 1, dims.d[2], dims.d[3]};
            auto* _slice = network->addSlice(*perm->getOutput(0), start, size, stride);
            _slice->setName(name);
            assert(_slice && _slice->getNbOutputs() == 1);
            return _slice->getOutput(0);
        };
        std::array<ITensor*, 3> channels = {add_slice(2, "R"), add_slice(1, "G"), add_slice(0, "B")};
        auto* cat = network->addConcatenation(channels.data(), 3);
        assert(cat);
        cat->setName("RGB");
        cat->setAxis(1);
        data = cat->getOutput(0);
    } else {
        data = perm->getOutput(0);
    }

    // Normalize
    auto* trans = network->addScale(*data, ScaleMode::kCHANNEL, shift, scale, empty);
    assert(trans);
    trans->setName("mean & std");
#if TRT_VERSION >= 8000
    trans->setChannelAxis(1);
#endif
    return trans;
}

static size_t getSize(DataType dt) {
    switch (dt) {
#if TRT_VERSION >= 8510
        case DataType::kUINT8:
#endif
        case DataType::kINT8:
            return sizeof(int8_t);
        case DataType::kFLOAT:
            return sizeof(float);
        case DataType::kHALF:
            return sizeof(int16_t);
        case DataType::kINT32:
            return sizeof(int32_t);
        default: {
            std::cerr << "Unsupported data type\n";
            std::abort();
        }
    }
}


================================================
FILE: arcface/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.6)

project(arcface)

add_definitions(-std=c++11)

option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)

find_package(CUDA REQUIRED)

include_directories(${PROJECT_SOURCE_DIR}/include)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
    message("embed_platform on")
    include_directories(/usr/local/cuda/targets/aarch64-linux/include)
    link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
else()
    message("embed_platform off")
    include_directories(/usr/local/cuda/include)
    link_directories(/usr/local/cuda/lib64)
endif()


set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")

cuda_add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/prelu.cu)

find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS})

add_executable(arcface-r50 ${PROJECT_SOURCE_DIR}/arcface-r50.cpp)
target_link_libraries(arcface-r50 nvinfer)
target_link_libraries(arcface-r50 cudart)
target_link_libraries(arcface-r50 myplugins)
target_link_libraries(arcface-r50 ${OpenCV_LIBS})

add_executable(arcface-mobilefacenet ${PROJECT_SOURCE_DIR}/arcface-mobilefacenet.cpp)
target_link_libraries(arcface-mobilefacenet nvinfer)
target_link_libraries(arcface-mobilefacenet cudart)
target_link_libraries(arcface-mobilefacenet myplugins)
target_link_libraries(arcface-mobilefacenet ${OpenCV_LIBS})

add_executable(arcface-r100 ${PROJECT_SOURCE_DIR}/arcface-r100.cpp)
target_link_libraries(arcface-r100 nvinfer)
target_link_libraries(arcface-r100 cudart)
target_link_libraries(arcface-r100 myplugins)
target_link_libraries(arcface-r100 ${OpenCV_LIBS})

add_definitions(-O2 -pthread)



================================================
FILE: arcface/README.md
================================================
# arcface
### TensortRT 8

The mxnet implementation is from [deepinsight/insightface.](https://github.com/deepinsight/insightface)

**Updated Pretrained Weights:** ArcFace-R100 [Insight Face Google Drive](https://drive.google.com/file/d/1Hc5zUfBATaXUgcU2haUNa7dcaZSw95h2/view)

---

**Previous Pre-trained models:** The pretrained models are from [LResNet50E-IR,ArcFace@ms1m-refine-v1](https://github.com/deepinsight/insightface/wiki/Model-Zoo#32-lresnet50e-irarcfacems1m-refine-v1), [LResNet100E-IR,ArcFace@ms1m-refine-v2](https://github.com/deepinsight/insightface/wiki/Model-Zoo#31-lresnet100e-irarcfacems1m-refine-v2) and [MobileFaceNet,ArcFace@ms1m-refine-v1](https://github.com/deepinsight/insightface/wiki/Model-Zoo#34-mobilefacenetarcfacems1m-refine-v1)

---

The two input images used in this project are joey0.ppm and joey1.ppm, download them from [Google Drive.](https://drive.google.com/drive/folders/1ctqpkRCRKyBZRCNwo9Uq4eUoMRLtFq1e). The input image is 112x112, and generated from `get_input()` in `insightface/deploy/face_model.py`, which is cropped and aligned face image.

<p align="center">
<img src="https://user-images.githubusercontent.com/15235574/83122953-f45f8d80-a106-11ea-84b0-4f6ff91b5924.jpg">
</p>

## Config

- FP16/FP32 can be selected by the macro `USE_FP16` in arcface-r50/r100/mobilefacenet.cpp
- GPU id can be selected by the macro `DEVICE` in arcface-r50/r100/mobilefacenet.cpp

## Run

1.Generate .wts file from mxnet implementation of pretrained model. The following example described how to generate arcface-r100.wts from mxnet implementation of LResNet100E-IR,ArcFace@ms1m-refine-v1.
```
git clone https://github.com/deepinsight/insightface
cd insightface
git checkout 3866cd77a6896c934b51ed39e9651b791d78bb57
cd deploy
// copy tensorrtx/arcface/gen_wts.py to here(insightface/deploy)
// download model-r100-ii.zip and unzip here(insightface/deploy)
python gen_wts.py
// a file 'arcface-r100.wts' will be generated.
// the master branch of insightface should work, if not, you can checkout 94ad870abb3203d6f31b049b70dd080dc8f33fca
// arcface-r50.wts/arcface-mobilefacenet.wts can be generated in similar way from mxnet implementation of LResNet50E-IR,ArcFace@ms1m-refine-v1/MobileFaceNet,ArcFace@ms1m-refine-v1 pretrained model.

```
2.Put .wts file into tensorrtx/arcface, build and run

```
cd tensorrtx/arcface
// download joey0.ppm and joey1.ppm, and put here(tensorrtx/arcface)
mkdir build
cd build
cmake ..
make
sudo ./arcface-r100 -s    // serialize model to plan file i.e. 'arcface-r100.engine'
sudo ./arcface-r100 -d    // deserialize plan file and run inference

or

sudo ./arcface-r50 -s   // serialize model to plan file i.e. 'arcface-r50.engine'
sudo ./arcface-r50 -d   // deserialize plan file and run inference


or

sudo ./arcface-mobilefacenet -s   // serialize model to plan file i.e. 'arcface-mobilefacenet.engine'
sudo ./arcface-mobilefacenet -d   // deserialize plan file and run inference
```

3.Check the output log, latency and similarity score.

## More Information

See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)


================================================
FILE: arcface/arcface-mobilefacenet.cpp
================================================
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include <chrono>
#include <opencv2/opencv.hpp>
#include <dirent.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "logging.h"

#define CHECK(status) \
    do\
    {\
        auto ret = (status);\
        if (ret != 0)\
        {\
            std::cerr << "Cuda failure: " << ret << std::endl;\
            abort();\
        }\
    } while (0)

//#define USE_FP16  // comment out this if want to use FP32
#define DEVICE 0  // GPU id
#define BATCH_SIZE 1  // currently, only support BATCH=1

using namespace nvinfer1;

// stuff we know about the network and the input/output blobs
static const int INPUT_H = 112;
static const int INPUT_W = 112;
static const int OUTPUT_SIZE = 128;
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;

// TensorRT weight files have a simple space delimited format:
// [type] [size] <data x size in hex>
std::map<std::string, Weights> loadWeights(const std::string file) {
    std::cout << "Loading weights: " << file << std::endl;
    std::map<std::string, Weights> weightMap;

    // Open weights file
    std::ifstream input(file);
    assert(input.is_open() && "Unable to load weight file.");

    // Read number of weight blobs
    int32_t count;
    input >> count;
    assert(count > 0 && "Invalid weight map file.");

    while (count--)
    {
        Weights wt{DataType::kFLOAT, nullptr, 0};
        uint32_t size;

        // Read name and type of blob
        std::string name;
        input >> name >> std::dec >> size;
        wt.type = DataType::kFLOAT;

        // Load blob
        uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
        for (uint32_t x = 0, y = size; x < y; ++x)
        {
            input >> std::hex >> val[x];
        }
        wt.values = val;

        wt.count = size;
        weightMap[name] = wt;
    }

    return weightMap;
}

IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
    float *gamma = (float*)weightMap[lname + "_gamma"].values;
    float *beta = (float*)weightMap[lname + "_beta"].values;
    float *mean = (float*)weightMap[lname + "_moving_mean"].values;
    float *var = (float*)weightMap[lname + "_moving_var"].values;
    int len = weightMap[lname + "_moving_var"].count;

    float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        scval[i] = gamma[i] / sqrt(var[i] + eps);
    }
    Weights scale{DataType::kFLOAT, scval, len};

    float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
    }
    Weights shift{DataType::kFLOAT, shval, len};

    float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        pval[i] = 1.0;
    }
    Weights power{DataType::kFLOAT, pval, len};

    weightMap[lname + ".scale"] = scale;
    weightMap[lname + ".shift"] = shift;
    weightMap[lname + ".power"] = power;
    IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
    assert(scale_1);
    return scale_1;
}

ILayer* addPRelu(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname) {
	float *gamma = (float*)weightMap[lname + "_gamma"].values;
	int len = weightMap[lname + "_gamma"].count;

	float *scval_1 = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	float *scval_2 = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		scval_1[i] = -1.0;
		scval_2[i] = -gamma[i];
	}
	Weights scale_1{ DataType::kFLOAT, scval_1, len };
	Weights scale_2{ DataType::kFLOAT, scval_2, len };

	float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		shval[i] = 0.0;
	}
	Weights shift{ DataType::kFLOAT, shval, len };

	float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		pval[i] = 1.0;
	}
	Weights power{ DataType::kFLOAT, pval, len };

	auto relu1 = network->addActivation(input, ActivationType::kRELU);
	assert(relu1);
	IScaleLayer* scale1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale_1, power);
	assert(scale1);
	auto relu2 = network->addActivation(*scale1->getOutput(0), ActivationType::kRELU);
	assert(relu2);
	IScaleLayer* scale2 = network->addScale(*relu2->getOutput(0), ScaleMode::kCHANNEL, shift, scale_2, power);
	assert(scale2);
	IElementWiseLayer* ew1 = network->addElementWise(*relu1->getOutput(0), *scale2->getOutput(0), ElementWiseOperation::kSUM);
	assert(ew1);
	return ew1;
}

ILayer* conv_bn_relu(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, int oup, int k = 3, int p = 1, int s = 2, int groups=1) {
    Weights emptywts{DataType::kFLOAT, nullptr, 0};
    IConvolutionLayer* conv1 = network->addConvolutionNd(input, oup, DimsHW{k, k}, weightMap[lname + "_conv2d_weight"], emptywts);
    assert(conv1);
    conv1->setStrideNd(DimsHW{s, s});
    conv1->setPaddingNd(DimsHW{p, p});
    conv1->setNbGroups(groups);
    auto bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "_batchnorm", 1e-3);
    assert(bn1);
    auto act1 = addPRelu(network, weightMap, *bn1->getOutput(0), lname + "_relu");
    assert(act1);
    return act1;
}

ILayer* conv_bn(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, int oup, int k = 3, int p = 1, int s = 1, int groups=1) {
    Weights emptywts{DataType::kFLOAT, nullptr, 0};
    IConvolutionLayer* conv1 = network->addConvolutionNd(input, oup, DimsHW{k, k}, weightMap[lname + "_conv2d_weight"], emptywts);
    assert(conv1);
    conv1->setStrideNd(DimsHW{s, s});
    conv1->setPaddingNd(DimsHW{p, p});
    conv1->setNbGroups(groups);
    auto bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "_batchnorm", 1e-3);
    assert(bn1);
    return bn1;
}

ILayer* DepthWise(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, int inp, int oup, int groups, int s) {
    Weights emptywts{DataType::kFLOAT, nullptr, 0};
    IConvolutionLayer* conv1 = network->addConvolutionNd(input, groups, DimsHW{1, 1}, weightMap[lname + "_conv_sep_conv2d_weight"], emptywts);
    assert(conv1);
    conv1->setStrideNd(DimsHW{1, 1});
    conv1->setPaddingNd(DimsHW{0, 0});
    conv1->setNbGroups(1);
    auto bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "_conv_sep_batchnorm", 1e-3);
    assert(bn1);
    auto act1 = addPRelu(network, weightMap, *bn1->getOutput(0), lname + "_conv_sep_relu");
    assert(act1);

    IConvolutionLayer* conv2 = network->addConvolutionNd(*act1->getOutput(0), groups, DimsHW{3, 3}, weightMap[lname + "_conv_dw_conv2d_weight"], emptywts);
    assert(conv2);
    conv2->setStrideNd(DimsHW{s, s});
    conv2->setPaddingNd(DimsHW{1, 1});
    conv2->setNbGroups(groups);
    auto bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "_conv_dw_batchnorm", 1e-3);
    assert(bn2);
    auto act2 = addPRelu(network, weightMap, *bn2->getOutput(0), lname + "_conv_dw_relu");
    assert(act2);

    IConvolutionLayer* conv3 = network->addConvolutionNd(*act2->getOutput(0), oup, DimsHW{1, 1}, weightMap[lname + "_conv_proj_conv2d_weight"], emptywts);
    assert(conv3);
    conv3->setStrideNd(DimsHW{1, 1});
    conv3->setPaddingNd(DimsHW{0, 0});
    conv3->setNbGroups(1);
    auto bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + "_conv_proj_batchnorm", 1e-3);
    assert(bn3);
    return bn3;
}


ILayer* DWResidual(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, int inp, int oup, int groups, int s) {

    auto dw1 = DepthWise(network, weightMap, input, lname, inp, oup, groups, s);
    IElementWiseLayer* ew1;
    ew1 = network->addElementWise(input, *dw1->getOutput(0), ElementWiseOperation::kSUM);
    assert(ew1);
    return ew1;
}


// Creat the engine using only the API and not any parser.
ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt) {
    INetworkDefinition* network = builder->createNetworkV2(0U);

    // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
    assert(data);

    std::map<std::string, Weights> weightMap = loadWeights("../arcface-mobilefacenet.wts");
    Weights emptywts{DataType::kFLOAT, nullptr, 0};

    auto conv_1 = conv_bn_relu(network, weightMap, *data, "conv_1", 64, 3, 1, 2);
    auto conv_2_dw = conv_bn_relu(network, weightMap, *conv_1->getOutput(0), "conv_2_dw", 64, 3, 1, 1, 64);
    auto conv_23 = DepthWise(network, weightMap, *conv_2_dw->getOutput(0), "dconv_23", 64, 64, 128, 2);
    auto res_3_block0 = DWResidual(network, weightMap, *conv_23->getOutput(0), "res_3_block0", 64, 64, 128, 1);
    auto res_3_block1 = DWResidual(network, weightMap, *res_3_block0->getOutput(0), "res_3_block1", 64, 64, 128, 1);
    auto res_3_block2 = DWResidual(network, weightMap, *res_3_block1->getOutput(0), "res_3_block2", 64, 64, 128, 1);
    auto res_3_block3 = DWResidual(network, weightMap, *res_3_block2->getOutput(0), "res_3_block3", 64, 64, 128, 1);
    auto conv_34 = DepthWise(network, weightMap, *res_3_block3->getOutput(0), "dconv_34", 64, 128, 256, 2);
    auto res_4_block0 = DWResidual(network, weightMap, *conv_34->getOutput(0), "res_4_block0", 128, 128, 256, 1);
    auto res_4_block1 = DWResidual(network, weightMap, *res_4_block0->getOutput(0), "res_4_block1", 128, 128, 256, 1);
    auto res_4_block2 = DWResidual(network, weightMap, *res_4_block1->getOutput(0), "res_4_block2", 128, 128, 256, 1);
    auto res_4_block3 = DWResidual(network, weightMap, *res_4_block2->getOutput(0), "res_4_block3", 128, 128, 256, 1);
    auto res_4_block4 = DWResidual(network, weightMap, *res_4_block3->getOutput(0), "res_4_block4", 128, 128, 256, 1);
    auto res_4_block5 = DWResidual(network, weightMap, *res_4_block4->getOutput(0), "res_4_block5", 128, 128, 256, 1);
    auto conv_45 = DepthWise(network, weightMap, *res_4_block5->getOutput(0), "dconv_45", 128, 128, 512, 2);
    auto res_5_block0 = DWResidual(network, weightMap, *conv_45->getOutput(0), "res_5_block0", 128, 128, 256, 1);
    auto res_5_block1 = DWResidual(network, weightMap, *res_5_block0->getOutput(0), "res_5_block1", 128, 128, 256, 1);
    auto conv_6_sep = conv_bn_relu(network, weightMap, *res_5_block1->getOutput(0), "conv_6sep", 512, 1, 0, 1);
    auto conv_6dw7_7 = conv_bn(network, weightMap, *conv_6_sep->getOutput(0), "conv_6dw7_7", 512, 7, 0, 1, 512);
    IFullyConnectedLayer* fc1 = network->addFullyConnected(*conv_6dw7_7->getOutput(0), 128, weightMap["fc1_weight"], weightMap["pre_fc1_bias"]);
    assert(fc1);
    auto bn1 = addBatchNorm2d(network, weightMap, *fc1->getOutput(0), "fc1", 2e-5);
    assert(bn1);
    bn1->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    network->markOutput(*bn1->getOutput(0));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB
#ifdef USE_FP16
    config->setFlag(BuilderFlag::kFP16);
#endif
    std::cout << "Building engine, please wait for a while..." << std::endl;
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    std::cout << "Build engine successfully!" << std::endl;

    // Don't need the network any more
    network->destroy();

    // Release host memory
    for (auto& mem : weightMap)
    {
        free((void*) (mem.second.values));
    }

    return engine;
}

void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
    // Create builder
    IBuilder* builder = createInferBuilder(gLogger);
    IBuilderConfig* config = builder->createBuilderConfig();

    // Create model to populate the network, then set the outputs and create an engine
    ICudaEngine* engine = createEngine(maxBatchSize, builder, config, DataType::kFLOAT);
    assert(engine != nullptr);

    // Serialize the engine
    (*modelStream) = engine->serialize();

    // Close everything down
    engine->destroy();
    builder->destroy();
}

void doInference(IExecutionContext& context, float* input, float* output, int batchSize) {
    const ICudaEngine& engine = context.getEngine();

    // Pointers to input and output device buffers to pass to engine.
    // Engine requires exactly IEngine::getNbBindings() number of buffers.
    assert(engine.getNbBindings() == 2);
    void* buffers[2];

    // In order to bind the buffers, we need to know the names of the input and output tensors.
    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
    const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
    const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);

    // Create GPU buffers on device
    CHECK(cudaMalloc(&buffers[inputIndex], batchSize * 3 * INPUT_H * INPUT_W * sizeof(float)));
    CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float)));

    // Create stream
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));

    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
    context.enqueue(batchSize, buffers, stream, nullptr);
    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
    cudaStreamSynchronize(stream);

    // Release stream and buffers
    cudaStreamDestroy(stream);
    CHECK(cudaFree(buffers[inputIndex]));
    CHECK(cudaFree(buffers[outputIndex]));
}

int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
    DIR *p_dir = opendir(p_dir_name);
    if (p_dir == nullptr) {
        return -1;
    }

    struct dirent* p_file = nullptr;
    while ((p_file = readdir(p_dir)) != nullptr) {
        if (strcmp(p_file->d_name, ".") != 0 &&
                strcmp(p_file->d_name, "..") != 0) {
            //std::string cur_file_name(p_dir_name);
            //cur_file_name += "/";
            //cur_file_name += p_file->d_name;
            std::string cur_file_name(p_file->d_name);
            file_names.push_back(cur_file_name);
        }
    }

    closedir(p_dir);
    return 0;
}

int main(int argc, char** argv) {
    cudaSetDevice(DEVICE);
    // create a model using the API directly and serialize it to a stream
    char *trtModelStream{nullptr};
    size_t size{0};

    if (argc == 2 && std::string(argv[1]) == "-s") {
        IHostMemory* modelStream{nullptr};
        APIToModel(BATCH_SIZE, &modelStream);
        assert(modelStream != nullptr);
        std::ofstream p("arcface-mobilefacenet.engine", std::ios::binary);
        if (!p) {
            std::cerr << "could not open plan output file" << std::endl;
            return -1;
        }
        p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
        modelStream->destroy();
        return 0;
    } else if (argc == 2 && std::string(argv[1]) == "-d") {
        std::ifstream file("arcface-mobilefacenet.engine", std::ios::binary);
        if (file.good()) {
            file.seekg(0, file.end);
            size = file.tellg();
            file.seekg(0, file.beg);
            trtModelStream = new char[size];
            assert(trtModelStream);
            file.read(trtModelStream, size);
            file.close();
        }
    } else {
        std::cerr << "arguments not right!" << std::endl;
        std::cerr << "./arcface-mobilefacenet -s  // serialize model to plan file" << std::endl;
        std::cerr << "./arcface-mobilefacenet -d  // deserialize plan file and run inference" << std::endl;
        return -1;
    }

    // prepare input data ---------------------------
    static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
    //for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
    //    data[i] = 1.0;
    static float prob[BATCH_SIZE * OUTPUT_SIZE];
    IRuntime* runtime = createInferRuntime(gLogger);
    assert(runtime != nullptr);
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
    assert(engine != nullptr);
    IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);
    delete[] trtModelStream;

    cv::Mat img = cv::imread("../joey0.ppm");
    for (int i = 0; i < INPUT_H * INPUT_W; i++) {
        data[i] = ((float)img.at<cv::Vec3b>(i)[2] - 127.5) * 0.0078125;
        data[i + INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[1] - 127.5) * 0.0078125;
        data[i + 2 * INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[0] - 127.5) * 0.0078125;
    }

    // Run inference
    auto start = std::chrono::system_clock::now();
    doInference(*context, data, prob, BATCH_SIZE);
    auto end = std::chrono::system_clock::now();
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

    cv::Mat out(128, 1, CV_32FC1, prob);
    cv::Mat out_norm;
    cv::normalize(out, out_norm);

    img = cv::imread("../joey1.ppm");
    for (int i = 0; i < INPUT_H * INPUT_W; i++) {
        data[i] = ((float)img.at<cv::Vec3b>(i)[2] - 127.5) * 0.0078125;
        data[i + INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[1] - 127.5) * 0.0078125;
        data[i + 2 * INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[0] - 127.5) * 0.0078125;
    }

    // Run inference
    start = std::chrono::system_clock::now();
    doInference(*context, data, prob, BATCH_SIZE);
    end = std::chrono::system_clock::now();
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

    cv::Mat out1(1, 128, CV_32FC1, prob);
    cv::Mat out_norm1;
    cv::normalize(out1, out_norm1);

    cv::Mat res = out_norm1 * out_norm;

    std::cout << "similarity score: " << *(float*)res.data << std::endl;

    // Destroy the engine
    context->destroy();
    engine->destroy();
    runtime->destroy();

    //Print histogram of the output distribution
    //std::cout << "\nOutput:\n\n";
    //for (unsigned int i = 0; i < OUTPUT_SIZE; i++)
    //{
    //    std::cout << p_out_norm[i] << ", ";
    //    if (i % 10 == 0) std::cout << i / 10 << std::endl;
    //}
    //std::cout << std::endl;

    return 0;
}


================================================
FILE: arcface/arcface-r100.cpp
================================================
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include <chrono>
#include <opencv2/opencv.hpp>
#include <dirent.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "logging.h"

#define CHECK(status) \
    do\
    {\
        auto ret = (status);\
        if (ret != 0)\
        {\
            std::cerr << "Cuda failure: " << ret << std::endl;\
            abort();\
        }\
    } while (0)

//#define USE_FP16  // comment out this if want to use FP32
#define DEVICE 0  // GPU id
#define BATCH_SIZE 1  // currently, only support BATCH=1

using namespace nvinfer1;

// stuff we know about the network and the input/output blobs
static const int INPUT_H = 112;
static const int INPUT_W = 112;
static const int OUTPUT_SIZE = 512;
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;

// TensorRT weight files have a simple space delimited format:
// [type] [size] <data x size in hex>
std::map<std::string, Weights> loadWeights(const std::string file) {
    std::cout << "Loading weights: " << file << std::endl;
    std::map<std::string, Weights> weightMap;

    // Open weights file
    std::ifstream input(file);
    assert(input.is_open() && "Unable to load weight file.");

    // Read number of weight blobs
    int32_t count;
    input >> count;
    assert(count > 0 && "Invalid weight map file.");

    while (count--)
    {
        Weights wt{DataType::kFLOAT, nullptr, 0};
        uint32_t size;

        // Read name and type of blob
        std::string name;
        input >> name >> std::dec >> size;
        wt.type = DataType::kFLOAT;

        // Load blob
        uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
        for (uint32_t x = 0, y = size; x < y; ++x)
        {
            input >> std::hex >> val[x];
        }
        wt.values = val;

        wt.count = size;
        weightMap[name] = wt;
    }

    return weightMap;
}

IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
    float *gamma = (float*)weightMap[lname + "_gamma"].values;
    float *beta = (float*)weightMap[lname + "_beta"].values;
    float *mean = (float*)weightMap[lname + "_moving_mean"].values;
    float *var = (float*)weightMap[lname + "_moving_var"].values;
    int len = weightMap[lname + "_moving_var"].count;

    float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        scval[i] = gamma[i] / sqrt(var[i] + eps);
    }
    Weights scale{DataType::kFLOAT, scval, len};

    float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
    }
    Weights shift{DataType::kFLOAT, shval, len};

    float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        pval[i] = 1.0;
    }
    Weights power{DataType::kFLOAT, pval, len};

    weightMap[lname + ".scale"] = scale;
    weightMap[lname + ".shift"] = shift;
    weightMap[lname + ".power"] = power;
    IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
    assert(scale_1);
    return scale_1;
}

ILayer* addPRelu(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname) {
	float *gamma = (float*)weightMap[lname + "_gamma"].values;
	int len = weightMap[lname + "_gamma"].count;

	float *scval_1 = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	float *scval_2 = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		scval_1[i] = -1.0;
		scval_2[i] = -gamma[i];
	}
	Weights scale_1{ DataType::kFLOAT, scval_1, len };
	Weights scale_2{ DataType::kFLOAT, scval_2, len };

	float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		shval[i] = 0.0;
	}
	Weights shift{ DataType::kFLOAT, shval, len };

	float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		pval[i] = 1.0;
	}
	Weights power{ DataType::kFLOAT, pval, len };

	auto relu1 = network->addActivation(input, ActivationType::kRELU);
	assert(relu1);
	IScaleLayer* scale1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale_1, power);
	assert(scale1);
	auto relu2 = network->addActivation(*scale1->getOutput(0), ActivationType::kRELU);
	assert(relu2);
	IScaleLayer* scale2 = network->addScale(*relu2->getOutput(0), ScaleMode::kCHANNEL, shift, scale_2, power);
	assert(scale2);
	IElementWiseLayer* ew1 = network->addElementWise(*relu1->getOutput(0), *scale2->getOutput(0), ElementWiseOperation::kSUM);
	assert(ew1);
	return ew1;
}

ILayer* resUnit(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int num_filters, int s, bool dim_match, std::string lname) {
    Weights emptywts{DataType::kFLOAT, nullptr, 0};
    auto bn1 = addBatchNorm2d(network, weightMap, input, lname + "_bn1", 2e-5);
    IConvolutionLayer* conv1 = network->addConvolutionNd(*bn1->getOutput(0), num_filters, DimsHW{3, 3}, weightMap[lname + "_conv1_weight"], emptywts);
    assert(conv1);
    conv1->setPaddingNd(DimsHW{1, 1});
    auto bn2 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "_bn2", 2e-5);
    auto act1 = addPRelu(network, weightMap, *bn2->getOutput(0), lname + "_relu1");
    IConvolutionLayer* conv2 = network->addConvolutionNd(*act1->getOutput(0), num_filters, DimsHW{3, 3}, weightMap[lname + "_conv2_weight"], emptywts);
    assert(conv2);
    conv2->setStrideNd(DimsHW{s, s});
    conv2->setPaddingNd(DimsHW{1, 1});
    auto bn3 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "_bn3", 2e-5);

    IElementWiseLayer* ew1;
    if (dim_match) {
        ew1 = network->addElementWise(input, *bn3->getOutput(0), ElementWiseOperation::kSUM);
    } else {
        IConvolutionLayer* conv1sc = network->addConvolutionNd(input, num_filters, DimsHW{1, 1}, weightMap[lname + "_conv1sc_weight"], emptywts);
        assert(conv1sc);
        conv1sc->setStrideNd(DimsHW{s, s});
        auto bn1sc = addBatchNorm2d(network, weightMap, *conv1sc->getOutput(0), lname + "_sc", 2e-5);
        ew1 = network->addElementWise(*bn1sc->getOutput(0), *bn3->getOutput(0), ElementWiseOperation::kSUM);
    }
    assert(ew1);
    return ew1;
}

// Creat the engine using only the API and not any parser.
ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt) {
    INetworkDefinition* network = builder->createNetworkV2(0U);

    // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
    assert(data);

    std::map<std::string, Weights> weightMap = loadWeights("../arcface-r100.wts");
    Weights emptywts{DataType::kFLOAT, nullptr, 0};

    IConvolutionLayer* conv0 = network->addConvolutionNd(*data, 64, DimsHW{3, 3}, weightMap["conv0_weight"], emptywts);
    assert(conv0);
    conv0->setPaddingNd(DimsHW{1, 1});
    auto bn0 = addBatchNorm2d(network, weightMap, *conv0->getOutput(0), "bn0", 2e-5);
    auto relu0 = addPRelu(network, weightMap, *bn0->getOutput(0), "relu0");

    auto s1u1 = resUnit(network, weightMap, *relu0->getOutput(0), 64, 2, false, "stage1_unit1");
    auto s1u2 = resUnit(network, weightMap, *s1u1->getOutput(0), 64, 1, true, "stage1_unit2");
    auto s1u3 = resUnit(network, weightMap, *s1u2->getOutput(0), 64, 1, true, "stage1_unit3");

    auto s2u1 = resUnit(network, weightMap, *s1u3->getOutput(0), 128, 2, false, "stage2_unit1");
    auto s2u2 = resUnit(network, weightMap, *s2u1->getOutput(0), 128, 1, true, "stage2_unit2");
    auto s2u3 = resUnit(network, weightMap, *s2u2->getOutput(0), 128, 1, true, "stage2_unit3");
    auto s2u4 = resUnit(network, weightMap, *s2u3->getOutput(0), 128, 1, true, "stage2_unit4");


    auto s2u5 = resUnit(network, weightMap, *s2u4->getOutput(0), 128, 1, true, "stage2_unit5");
    auto s2u6 = resUnit(network, weightMap, *s2u5->getOutput(0), 128, 1, true, "stage2_unit6");
    auto s2u7 = resUnit(network, weightMap, *s2u6->getOutput(0), 128, 1, true, "stage2_unit7");
    auto s2u8 = resUnit(network, weightMap, *s2u7->getOutput(0), 128, 1, true, "stage2_unit8");

    auto s2u9 = resUnit(network, weightMap, *s2u8->getOutput(0), 128, 1, true, "stage2_unit9");
    auto s2u10 = resUnit(network, weightMap, *s2u9->getOutput(0), 128, 1, true, "stage2_unit10");
    auto s2u11 = resUnit(network, weightMap, *s2u10->getOutput(0), 128, 1, true, "stage2_unit11");
    auto s2u12 = resUnit(network, weightMap, *s2u11->getOutput(0), 128, 1, true, "stage2_unit12");
    auto s2u13 = resUnit(network, weightMap, *s2u12->getOutput(0), 128, 1, true, "stage2_unit13");

    auto s3u1 = resUnit(network, weightMap, *s2u13->getOutput(0), 256, 2, false, "stage3_unit1");
    auto s3u2 = resUnit(network, weightMap, *s3u1->getOutput(0), 256, 1, true, "stage3_unit2");
    auto s3u3 = resUnit(network, weightMap, *s3u2->getOutput(0), 256, 1, true, "stage3_unit3");
    auto s3u4 = resUnit(network, weightMap, *s3u3->getOutput(0), 256, 1, true, "stage3_unit4");
    auto s3u5 = resUnit(network, weightMap, *s3u4->getOutput(0), 256, 1, true, "stage3_unit5");
    auto s3u6 = resUnit(network, weightMap, *s3u5->getOutput(0), 256, 1, true, "stage3_unit6");
    auto s3u7 = resUnit(network, weightMap, *s3u6->getOutput(0), 256, 1, true, "stage3_unit7");
    auto s3u8 = resUnit(network, weightMap, *s3u7->getOutput(0), 256, 1, true, "stage3_unit8");
    auto s3u9 = resUnit(network, weightMap, *s3u8->getOutput(0), 256, 1, true, "stage3_unit9");
    auto s3u10 = resUnit(network, weightMap, *s3u9->getOutput(0), 256, 1, true, "stage3_unit10");
    auto s3u11 = resUnit(network, weightMap, *s3u10->getOutput(0), 256, 1, true, "stage3_unit11");
    auto s3u12 = resUnit(network, weightMap, *s3u11->getOutput(0), 256, 1, true, "stage3_unit12");
    auto s3u13 = resUnit(network, weightMap, *s3u12->getOutput(0), 256, 1, true, "stage3_unit13");
    auto s3u14 = resUnit(network, weightMap, *s3u13->getOutput(0), 256, 1, true, "stage3_unit14");

    auto s3u15 = resUnit(network, weightMap, *s3u14->getOutput(0), 256, 1, true, "stage3_unit15");
    auto s3u16 = resUnit(network, weightMap, *s3u15->getOutput(0), 256, 1, true, "stage3_unit16");
    auto s3u17 = resUnit(network, weightMap, *s3u16->getOutput(0), 256, 1, true, "stage3_unit17");
    auto s3u18 = resUnit(network, weightMap, *s3u17->getOutput(0), 256, 1, true, "stage3_unit18");
    auto s3u19 = resUnit(network, weightMap, *s3u18->getOutput(0), 256, 1, true, "stage3_unit19");
    auto s3u20 = resUnit(network, weightMap, *s3u19->getOutput(0), 256, 1, true, "stage3_unit20");
    auto s3u21 = resUnit(network, weightMap, *s3u20->getOutput(0), 256, 1, true, "stage3_unit21");
    auto s3u22 = resUnit(network, weightMap, *s3u21->getOutput(0), 256, 1, true, "stage3_unit22");
    auto s3u23 = resUnit(network, weightMap, *s3u22->getOutput(0), 256, 1, true, "stage3_unit23");
    auto s3u24 = resUnit(network, weightMap, *s3u23->getOutput(0), 256, 1, true, "stage3_unit24");
    auto s3u25 = resUnit(network, weightMap, *s3u24->getOutput(0), 256, 1, true, "stage3_unit25");
    auto s3u26 = resUnit(network, weightMap, *s3u25->getOutput(0), 256, 1, true, "stage3_unit26");
    auto s3u27 = resUnit(network, weightMap, *s3u26->getOutput(0), 256, 1, true, "stage3_unit27");
    auto s3u28 = resUnit(network, weightMap, *s3u27->getOutput(0), 256, 1, true, "stage3_unit28");
    auto s3u29 = resUnit(network, weightMap, *s3u28->getOutput(0), 256, 1, true, "stage3_unit29");
    auto s3u30 = resUnit(network, weightMap, *s3u29->getOutput(0), 256, 1, true, "stage3_unit30");

    auto s4u1 = resUnit(network, weightMap, *s3u30->getOutput(0), 512, 2, false, "stage4_unit1");
    auto s4u2 = resUnit(network, weightMap, *s4u1->getOutput(0), 512, 1, true, "stage4_unit2");
    auto s4u3 = resUnit(network, weightMap, *s4u2->getOutput(0), 512, 1, true, "stage4_unit3");

    auto bn1 = addBatchNorm2d(network, weightMap, *s4u3->getOutput(0), "bn1", 2e-5);
    IFullyConnectedLayer* fc1 = network->addFullyConnected(*bn1->getOutput(0), 512, weightMap["pre_fc1_weight"], weightMap["pre_fc1_bias"]);
    assert(fc1);
    auto bn2 = addBatchNorm2d(network, weightMap, *fc1->getOutput(0), "fc1", 2e-5);

    bn2->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    network->markOutput(*bn2->getOutput(0));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB
#ifdef USE_FP16
    config->setFlag(BuilderFlag::kFP16);
#endif
    std::cout << "Building engine, please wait for a while..." << std::endl;
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    std::cout << "Build engine successfully!" << std::endl;

    // Don't need the network any more
    network->destroy();

    // Release host memory
    for (auto& mem : weightMap)
    {
        free((void*) (mem.second.values));
    }

    return engine;
}

void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
    // Create builder
    IBuilder* builder = createInferBuilder(gLogger);
    IBuilderConfig* config = builder->createBuilderConfig();

    // Create model to populate the network, then set the outputs and create an engine
    ICudaEngine* engine = createEngine(maxBatchSize, builder, config, DataType::kFLOAT);
    assert(engine != nullptr);

    // Serialize the engine
    (*modelStream) = engine->serialize();

    // Close everything down
    engine->destroy();
    builder->destroy();
}

void doInference(IExecutionContext& context, float* input, float* output, int batchSize) {
    const ICudaEngine& engine = context.getEngine();

    // Pointers to input and output device buffers to pass to engine.
    // Engine requires exactly IEngine::getNbBindings() number of buffers.
    assert(engine.getNbBindings() == 2);
    void* buffers[2];

    // In order to bind the buffers, we need to know the names of the input and output tensors.
    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
    const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
    const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);

    // Create GPU buffers on device
    CHECK(cudaMalloc(&buffers[inputIndex], batchSize * 3 * INPUT_H * INPUT_W * sizeof(float)));
    CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float)));

    // Create stream
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));

    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
    context.enqueue(batchSize, buffers, stream, nullptr);
    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
    cudaStreamSynchronize(stream);

    // Release stream and buffers
    cudaStreamDestroy(stream);
    CHECK(cudaFree(buffers[inputIndex]));
    CHECK(cudaFree(buffers[outputIndex]));
}

int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
    DIR *p_dir = opendir(p_dir_name);
    if (p_dir == nullptr) {
        return -1;
    }

    struct dirent* p_file = nullptr;
    while ((p_file = readdir(p_dir)) != nullptr) {
        if (strcmp(p_file->d_name, ".") != 0 &&
                strcmp(p_file->d_name, "..") != 0) {
            //std::string cur_file_name(p_dir_name);
            //cur_file_name += "/";
            //cur_file_name += p_file->d_name;
            std::string cur_file_name(p_file->d_name);
            file_names.push_back(cur_file_name);
        }
    }

    closedir(p_dir);
    return 0;
}

int main(int argc, char** argv) {
    cudaSetDevice(DEVICE);
    // create a model using the API directly and serialize it to a stream
    char *trtModelStream{nullptr};
    size_t size{0};

    if (argc == 2 && std::string(argv[1]) == "-s") {
        IHostMemory* modelStream{nullptr};
        APIToModel(256, &modelStream);
        assert(modelStream != nullptr);
        std::ofstream p("arcface-r100.engine", std::ios::binary);
        if (!p) {
            std::cerr << "could not open plan output file" << std::endl;
            return -1;
        }
        p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
        modelStream->destroy();
        return 0;
    } else if (argc == 2 && std::string(argv[1]) == "-d") {
        std::ifstream file("arcface-r100.engine", std::ios::binary);
        if (file.good()) {
            file.seekg(0, file.end);
            size = file.tellg();
            file.seekg(0, file.beg);
            trtModelStream = new char[size];
            assert(trtModelStream);
            file.read(trtModelStream, size);
            file.close();
        }
    } else {
        std::cerr << "arguments not right!" << std::endl;
        std::cerr << "./arcface-r100 -s  // serialize model to plan file" << std::endl;
        std::cerr << "./arcface-r100 -d  // deserialize plan file and run inference" << std::endl;
        return -1;
    }

    // prepare input data ---------------------------
    static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
    //for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
    //    data[i] = 1.0;
    static float prob[BATCH_SIZE * OUTPUT_SIZE];
    IRuntime* runtime = createInferRuntime(gLogger);
    assert(runtime != nullptr);
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
    assert(engine != nullptr);
    IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);
    delete[] trtModelStream;

    cv::Mat img = cv::imread("../joey0.ppm");
    for (int i = 0; i < INPUT_H * INPUT_W; i++) {
        data[i] = ((float)img.at<cv::Vec3b>(i)[2] - 127.5) * 0.0078125;
        data[i + INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[1] - 127.5) * 0.0078125;
        data[i + 2 * INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[0] - 127.5) * 0.0078125;
    }

    // Run inference
    auto start = std::chrono::system_clock::now();
    doInference(*context, data, prob, BATCH_SIZE);
    auto end = std::chrono::system_clock::now();
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

    cv::Mat out(512, 1, CV_32FC1, prob);
    cv::Mat out_norm;
    cv::normalize(out, out_norm);

    img = cv::imread("../joey1.ppm");
    for (int i = 0; i < INPUT_H * INPUT_W; i++) {
        data[i] = ((float)img.at<cv::Vec3b>(i)[2] - 127.5) * 0.0078125;
        data[i + INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[1] - 127.5) * 0.0078125;
        data[i + 2 * INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[0] - 127.5) * 0.0078125;
    }

    // Run inference
    start = std::chrono::system_clock::now();
    doInference(*context, data, prob, BATCH_SIZE);
    end = std::chrono::system_clock::now();
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

    cv::Mat out1(1, 512, CV_32FC1, prob);
    cv::Mat out_norm1;
    cv::normalize(out1, out_norm1);

    cv::Mat res = out_norm1 * out_norm;

    std::cout << "similarity score: " << *(float*)res.data << std::endl;

    // Destroy the engine
    context->destroy();
    engine->destroy();
    runtime->destroy();

    //Print histogram of the output distribution
    //std::cout << "\nOutput:\n\n";
    //for (unsigned int i = 0; i < OUTPUT_SIZE; i++)
    //{
    //    std::cout << p_out_norm[i] << ", ";
    //    if (i % 10 == 0) std::cout << i / 10 << std::endl;
    //}
    //std::cout << std::endl;

    return 0;
}

================================================
FILE: arcface/arcface-r50.cpp
================================================
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include <chrono>
#include <opencv2/opencv.hpp>
#include <dirent.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "logging.h"

#define CHECK(status) \
    do\
    {\
        auto ret = (status);\
        if (ret != 0)\
        {\
            std::cerr << "Cuda failure: " << ret << std::endl;\
            abort();\
        }\
    } while (0)

//#define USE_FP16  // comment out this if want to use FP32
#define DEVICE 0  // GPU id
#define BATCH_SIZE 1  // currently, only support BATCH=1

using namespace nvinfer1;

// stuff we know about the network and the input/output blobs
static const int INPUT_H = 112;
static const int INPUT_W = 112;
static const int OUTPUT_SIZE = 512;
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;

// TensorRT weight files have a simple space delimited format:
// [type] [size] <data x size in hex>
std::map<std::string, Weights> loadWeights(const std::string file) {
    std::cout << "Loading weights: " << file << std::endl;
    std::map<std::string, Weights> weightMap;

    // Open weights file
    std::ifstream input(file);
    assert(input.is_open() && "Unable to load weight file.");

    // Read number of weight blobs
    int32_t count;
    input >> count;
    assert(count > 0 && "Invalid weight map file.");

    while (count--)
    {
        Weights wt{DataType::kFLOAT, nullptr, 0};
        uint32_t size;

        // Read name and type of blob
        std::string name;
        input >> name >> std::dec >> size;
        wt.type = DataType::kFLOAT;

        // Load blob
        uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
        for (uint32_t x = 0, y = size; x < y; ++x)
        {
            input >> std::hex >> val[x];
        }
        wt.values = val;
        
        wt.count = size;
        weightMap[name] = wt;
    }

    return weightMap;
}

IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
    float *gamma = (float*)weightMap[lname + "_gamma"].values;
    float *beta = (float*)weightMap[lname + "_beta"].values;
    float *mean = (float*)weightMap[lname + "_moving_mean"].values;
    float *var = (float*)weightMap[lname + "_moving_var"].values;
    int len = weightMap[lname + "_moving_var"].count;

    float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        scval[i] = gamma[i] / sqrt(var[i] + eps);
    }
    Weights scale{DataType::kFLOAT, scval, len};
    
    float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
    }
    Weights shift{DataType::kFLOAT, shval, len};

    float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        pval[i] = 1.0;
    }
    Weights power{DataType::kFLOAT, pval, len};

    weightMap[lname + ".scale"] = scale;
    weightMap[lname + ".shift"] = shift;
    weightMap[lname + ".power"] = power;
    IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
    assert(scale_1);
    return scale_1;
}

ILayer* addPRelu(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname) {
	float *gamma = (float*)weightMap[lname + "_gamma"].values;
	int len = weightMap[lname + "_gamma"].count;

	float *scval_1 = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	float *scval_2 = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		scval_1[i] = -1.0;
		scval_2[i] = -gamma[i];
	}
	Weights scale_1{ DataType::kFLOAT, scval_1, len };
	Weights scale_2{ DataType::kFLOAT, scval_2, len };

	float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		shval[i] = 0.0;
	}
	Weights shift{ DataType::kFLOAT, shval, len };

	float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
	for (int i = 0; i < len; i++) {
		pval[i] = 1.0;
	}
	Weights power{ DataType::kFLOAT, pval, len };

	auto relu1 = network->addActivation(input, ActivationType::kRELU);
	assert(relu1);
	IScaleLayer* scale1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale_1, power);
	assert(scale1);
	auto relu2 = network->addActivation(*scale1->getOutput(0), ActivationType::kRELU);
	assert(relu2);
	IScaleLayer* scale2 = network->addScale(*relu2->getOutput(0), ScaleMode::kCHANNEL, shift, scale_2, power);
	assert(scale2);
	IElementWiseLayer* ew1 = network->addElementWise(*relu1->getOutput(0), *scale2->getOutput(0), ElementWiseOperation::kSUM);
	assert(ew1);
	return ew1;
}

ILayer* resUnit(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int num_filters, int s, bool dim_match, std::string lname) {
    Weights emptywts{DataType::kFLOAT, nullptr, 0};
    auto bn1 = addBatchNorm2d(network, weightMap, input, lname + "_bn1", 2e-5);
    IConvolutionLayer* conv1 = network->addConvolutionNd(*bn1->getOutput(0), num_filters, DimsHW{3, 3}, weightMap[lname + "_conv1_weight"], emptywts);
    assert(conv1);
    conv1->setPaddingNd(DimsHW{1, 1});
    auto bn2 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "_bn2", 2e-5);
    auto act1 = addPRelu(network, weightMap, *bn2->getOutput(0), lname + "_relu1");
    IConvolutionLayer* conv2 = network->addConvolutionNd(*act1->getOutput(0), num_filters, DimsHW{3, 3}, weightMap[lname + "_conv2_weight"], emptywts);
    assert(conv2);
    conv2->setStrideNd(DimsHW{s, s});
    conv2->setPaddingNd(DimsHW{1, 1});
    auto bn3 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "_bn3", 2e-5);

    IElementWiseLayer* ew1;
    if (dim_match) {
        ew1 = network->addElementWise(input, *bn3->getOutput(0), ElementWiseOperation::kSUM);
    } else {
        IConvolutionLayer* conv1sc = network->addConvolutionNd(input, num_filters, DimsHW{1, 1}, weightMap[lname + "_conv1sc_weight"], emptywts);
        assert(conv1sc);
        conv1sc->setStrideNd(DimsHW{s, s});
        auto bn1sc = addBatchNorm2d(network, weightMap, *conv1sc->getOutput(0), lname + "_sc", 2e-5);
        ew1 = network->addElementWise(*bn1sc->getOutput(0), *bn3->getOutput(0), ElementWiseOperation::kSUM);
    }
    assert(ew1);
    return ew1;
}

// Creat the engine using only the API and not any parser.
ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt) {
    INetworkDefinition* network = builder->createNetworkV2(0U);

    // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
    assert(data);

    std::map<std::string, Weights> weightMap = loadWeights("../arcface-r50.wts");
    Weights emptywts{DataType::kFLOAT, nullptr, 0};

    IConvolutionLayer* conv0 = network->addConvolutionNd(*data, 64, DimsHW{3, 3}, weightMap["conv0_weight"], emptywts);
    assert(conv0);
    conv0->setPaddingNd(DimsHW{1, 1});
    auto bn0 = addBatchNorm2d(network, weightMap, *conv0->getOutput(0), "bn0", 2e-5);
    auto relu0 = addPRelu(network, weightMap, *bn0->getOutput(0), "relu0");

    auto s1u1 = resUnit(network, weightMap, *relu0->getOutput(0), 64, 2, false, "stage1_unit1");
    auto s1u2 = resUnit(network, weightMap, *s1u1->getOutput(0), 64, 1, true, "stage1_unit2");
    auto s1u3 = resUnit(network, weightMap, *s1u2->getOutput(0), 64, 1, true, "stage1_unit3");

    auto s2u1 = resUnit(network, weightMap, *s1u3->getOutput(0), 128, 2, false, "stage2_unit1");
    auto s2u2 = resUnit(network, weightMap, *s2u1->getOutput(0), 128, 1, true, "stage2_unit2");
    auto s2u3 = resUnit(network, weightMap, *s2u2->getOutput(0), 128, 1, true, "stage2_unit3");
    auto s2u4 = resUnit(network, weightMap, *s2u3->getOutput(0), 128, 1, true, "stage2_unit4");

    auto s3u1 = resUnit(network, weightMap, *s2u4->getOutput(0), 256, 2, false, "stage3_unit1");
    auto s3u2 = resUnit(network, weightMap, *s3u1->getOutput(0), 256, 1, true, "stage3_unit2");
    auto s3u3 = resUnit(network, weightMap, *s3u2->getOutput(0), 256, 1, true, "stage3_unit3");
    auto s3u4 = resUnit(network, weightMap, *s3u3->getOutput(0), 256, 1, true, "stage3_unit4");
    auto s3u5 = resUnit(network, weightMap, *s3u4->getOutput(0), 256, 1, true, "stage3_unit5");
    auto s3u6 = resUnit(network, weightMap, *s3u5->getOutput(0), 256, 1, true, "stage3_unit6");
    auto s3u7 = resUnit(network, weightMap, *s3u6->getOutput(0), 256, 1, true, "stage3_unit7");
    auto s3u8 = resUnit(network, weightMap, *s3u7->getOutput(0), 256, 1, true, "stage3_unit8");
    auto s3u9 = resUnit(network, weightMap, *s3u8->getOutput(0), 256, 1, true, "stage3_unit9");
    auto s3u10 = resUnit(network, weightMap, *s3u9->getOutput(0), 256, 1, true, "stage3_unit10");
    auto s3u11 = resUnit(network, weightMap, *s3u10->getOutput(0), 256, 1, true, "stage3_unit11");
    auto s3u12 = resUnit(network, weightMap, *s3u11->getOutput(0), 256, 1, true, "stage3_unit12");
    auto s3u13 = resUnit(network, weightMap, *s3u12->getOutput(0), 256, 1, true, "stage3_unit13");
    auto s3u14 = resUnit(network, weightMap, *s3u13->getOutput(0), 256, 1, true, "stage3_unit14");

    auto s4u1 = resUnit(network, weightMap, *s3u14->getOutput(0), 512, 2, false, "stage4_unit1");
    auto s4u2 = resUnit(network, weightMap, *s4u1->getOutput(0), 512, 1, true, "stage4_unit2");
    auto s4u3 = resUnit(network, weightMap, *s4u2->getOutput(0), 512, 1, true, "stage4_unit3");

    auto bn1 = addBatchNorm2d(network, weightMap, *s4u3->getOutput(0), "bn1", 2e-5);
    IFullyConnectedLayer* fc1 = network->addFullyConnected(*bn1->getOutput(0), 512, weightMap["pre_fc1_weight"], weightMap["pre_fc1_bias"]);
    assert(fc1);
    auto bn2 = addBatchNorm2d(network, weightMap, *fc1->getOutput(0), "fc1", 2e-5);

    bn2->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    network->markOutput(*bn2->getOutput(0));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB
#ifdef USE_FP16
    config->setFlag(BuilderFlag::kFP16);
#endif
    std::cout << "Building engine, please wait for a while..." << std::endl;
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    std::cout << "Build engine successfully!" << std::endl;

    // Don't need the network any more
    network->destroy();

    // Release host memory
    for (auto& mem : weightMap)
    {
        free((void*) (mem.second.values));
    }

    return engine;
}

void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
    // Create builder
    IBuilder* builder = createInferBuilder(gLogger);
    IBuilderConfig* config = builder->createBuilderConfig();

    // Create model to populate the network, then set the outputs and create an engine
    ICudaEngine* engine = createEngine(maxBatchSize, builder, config, DataType::kFLOAT);
    assert(engine != nullptr);

    // Serialize the engine
    (*modelStream) = engine->serialize();

    // Close everything down
    engine->destroy();
    builder->destroy();
}

void doInference(IExecutionContext& context, float* input, float* output, int batchSize) {
    const ICudaEngine& engine = context.getEngine();

    // Pointers to input and output device buffers to pass to engine.
    // Engine requires exactly IEngine::getNbBindings() number of buffers.
    assert(engine.getNbBindings() == 2);
    void* buffers[2];

    // In order to bind the buffers, we need to know the names of the input and output tensors.
    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
    const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
    const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);

    // Create GPU buffers on device
    CHECK(cudaMalloc(&buffers[inputIndex], batchSize * 3 * INPUT_H * INPUT_W * sizeof(float)));
    CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float)));

    // Create stream
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));

    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
    context.enqueue(batchSize, buffers, stream, nullptr);
    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
    cudaStreamSynchronize(stream);

    // Release stream and buffers
    cudaStreamDestroy(stream);
    CHECK(cudaFree(buffers[inputIndex]));
    CHECK(cudaFree(buffers[outputIndex]));
}

int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
    DIR *p_dir = opendir(p_dir_name);
    if (p_dir == nullptr) {
        return -1;
    }

    struct dirent* p_file = nullptr;
    while ((p_file = readdir(p_dir)) != nullptr) {
        if (strcmp(p_file->d_name, ".") != 0 &&
                strcmp(p_file->d_name, "..") != 0) {
            //std::string cur_file_name(p_dir_name);
            //cur_file_name += "/";
            //cur_file_name += p_file->d_name;
            std::string cur_file_name(p_file->d_name);
            file_names.push_back(cur_file_name);
        }
    }

    closedir(p_dir);
    return 0;
}

int main(int argc, char** argv) {
    cudaSetDevice(DEVICE);
    // create a model using the API directly and serialize it to a stream
    char *trtModelStream{nullptr};
    size_t size{0};

    if (argc == 2 && std::string(argv[1]) == "-s") {
        IHostMemory* modelStream{nullptr};
        APIToModel(BATCH_SIZE, &modelStream);
        assert(modelStream != nullptr);
        std::ofstream p("arcface-r50.engine", std::ios::binary);
        if (!p) {
            std::cerr << "could not open plan output file" << std::endl;
            return -1;
        }
        p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
        modelStream->destroy();
        return 0;
    } else if (argc == 2 && std::string(argv[1]) == "-d") {
        std::ifstream file("arcface-r50.engine", std::ios::binary);
        if (file.good()) {
            file.seekg(0, file.end);
            size = file.tellg();
            file.seekg(0, file.beg);
            trtModelStream = new char[size];
            assert(trtModelStream);
            file.read(trtModelStream, size);
            file.close();
        }
    } else {
        std::cerr << "arguments not right!" << std::endl;
        std::cerr << "./arcface-r50 -s  // serialize model to plan file" << std::endl;
        std::cerr << "./arcface-r50 -d  // deserialize plan file and run inference" << std::endl;
        return -1;
    }

    // prepare input data ---------------------------
    static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
    //for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
    //    data[i] = 1.0;
    static float prob[BATCH_SIZE * OUTPUT_SIZE];
    IRuntime* runtime = createInferRuntime(gLogger);
    assert(runtime != nullptr);
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
    assert(engine != nullptr);
    IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);
    delete[] trtModelStream;

    cv::Mat img = cv::imread("../joey0.ppm");
    for (int i = 0; i < INPUT_H * INPUT_W; i++) {
        data[i] = ((float)img.at<cv::Vec3b>(i)[2] - 127.5) * 0.0078125;
        data[i + INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[1] - 127.5) * 0.0078125;
        data[i + 2 * INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[0] - 127.5) * 0.0078125;
    }

    // Run inference
    auto start = std::chrono::system_clock::now();
    doInference(*context, data, prob, BATCH_SIZE);
    auto end = std::chrono::system_clock::now();
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

    cv::Mat out(512, 1, CV_32FC1, prob);
    cv::Mat out_norm;
    cv::normalize(out, out_norm);

    img = cv::imread("../joey1.ppm");
    for (int i = 0; i < INPUT_H * INPUT_W; i++) {
        data[i] = ((float)img.at<cv::Vec3b>(i)[2] - 127.5) * 0.0078125;
        data[i + INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[1] - 127.5) * 0.0078125;
        data[i + 2 * INPUT_H * INPUT_W] = ((float)img.at<cv::Vec3b>(i)[0] - 127.5) * 0.0078125;
    }

    // Run inference
    start = std::chrono::system_clock::now();
    doInference(*context, data, prob, BATCH_SIZE);
    end = std::chrono::system_clock::now();
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

    cv::Mat out1(1, 512, CV_32FC1, prob);
    cv::Mat out_norm1;
    cv::normalize(out1, out_norm1);

    cv::Mat res = out_norm1 * out_norm;

    std::cout << "similarity score: " << *(float*)res.data << std::endl;

    // Destroy the engine
    context->destroy();
    engine->destroy();
    runtime->destroy();

    //Print histogram of the output distribution
    //std::cout << "\nOutput:\n\n";
    //for (unsigned int i = 0; i < OUTPUT_SIZE; i++)
    //{
    //    std::cout << p_out_norm[i] << ", ";
    //    if (i % 10 == 0) std::cout << i / 10 << std::endl;
    //}
    //std::cout << std::endl;

    return 0;
}


================================================
FILE: arcface/gen_wts.py
================================================
import struct
import sys
import argparse
import face_model
import cv2
import numpy as np

parser = argparse.ArgumentParser(description='face model test')
# general
parser.add_argument('--image-size', default='112,112', help='')
parser.add_argument('--model', default='model-r100-ii/model,0', help='path to load model.')
parser.add_argument('--ga-model', default='', help='path to load model.')
parser.add_argument('--gpu', default=0, type=int, help='gpu id')
parser.add_argument('--det', default=0, type=int, help='mtcnn option, 1 means using R+O, 0 means detect from begining')
parser.add_argument('--flip', default=0, type=int, help='whether do lr flip aug')
parser.add_argument('--threshold', default=1.24, type=float, help='ver dist threshold')
args = parser.parse_args()

model = face_model.FaceModel(args)

f = open('arcface-r100.wts', 'w')
f.write('{}\n'.format(len(model.model.get_params()[0].keys()) + len(model.model.get_params()[1].keys())))
for k, v in model.model.get_params()[0].items():
    vr = v.reshape(-1).asnumpy()
    f.write('{} {} '.format(k, len(vr)))
    for vv in vr:
        f.write(' ')
        f.write(struct.pack('>f',float(vv)).hex())
    f.write('\n')
for k, v in model.model.get_params()[1].items():
    vr = v.reshape(-1).asnumpy()
    f.write('{} {} '.format(k, len(vr)))
    for vv in vr:
        f.write(' ')
        f.write(struct.pack('>f',float(vv)).hex())
    f.write('\n')



================================================
FILE: arcface/logging.h
================================================
/*
 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H

#include "NvInferRuntimeCommon.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <sstream>
#include <string>
#include "macros.h"

#if NV_TENSORRT_MAJOR >= 8
#define TRT_NOEXCEPT noexcept
#else
#define TRT_NOEXCEPT
#endif

using Severity = nvinfer1::ILogger::Severity;

class LogStreamConsumerBuffer : public std::stringbuf
{
public:
    LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
        : mOutput(stream)
        , mPrefix(prefix)
        , mShouldLog(shouldLog)
    {
    }

    LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
        : mOutput(other.mOutput)
    {
    }

    ~LogStreamConsumerBuffer()
    {
        // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
        // std::streambuf::pptr() gives a pointer to the current position of the output sequence
        // if the pointer to the beginning is not equal to the pointer to the current position,
        // call putOutput() to log the output to the stream
        if (pbase() != pptr())
        {
            putOutput();
        }
    }

    // synchronizes the stream buffer and returns 0 on success
    // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
    // resetting the buffer and flushing the stream
    virtual int sync()
    {
        putOutput();
        return 0;
    }

    void putOutput()
    {
        if (mShouldLog)
        {
            // prepend timestamp
            std::time_t timestamp = std::time(nullptr);
            tm* tm_local = std::localtime(&timestamp);
            std::cout << "[";
            std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
            std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
            // std::stringbuf::str() gets the string contents of the buffer
            // insert the buffer contents pre-appended by the appropriate prefix into the stream
            mOutput << mPrefix << str();
            // set the buffer to empty
            str("");
            // flush the stream
            mOutput.flush();
        }
    }

    void setShouldLog(bool shouldLog)
    {
        mShouldLog = shouldLog;
    }

private:
    std::ostream& mOutput;
    std::string mPrefix;
    bool mShouldLog;
};

//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase
{
public:
    LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
        : mBuffer(stream, prefix, shouldLog)
    {
    }

protected:
    LogStreamConsumerBuffer mBuffer;
};

//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
//!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
//!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
//!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
//!  Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
{
public:
    //! \brief Creates a LogStreamConsumer which logs messages with level severity.
    //!  Reportable severity determines if the messages are severe enough to be logged.
    LogStreamConsumer(Severity reportableSeverity, Severity severity)
        : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
        , std::ostream(&mBuffer) // links the stream buffer with the stream
        , mShouldLog(severity <= reportableSeverity)
        , mSeverity(severity)
    {
    }

    LogStreamConsumer(LogStreamConsumer&& other)
        : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
        , std::ostream(&mBuffer) // links the stream buffer with the stream
        , mShouldLog(other.mShouldLog)
        , mSeverity(other.mSeverity)
    {
    }

    void setReportableSeverity(Severity reportableSeverity)
    {
        mShouldLog = mSeverity <= reportableSeverity;
        mBuffer.setShouldLog(mShouldLog);
    }

private:
    static std::ostream& severityOstream(Severity severity)
    {
        return severity >= Severity::kINFO ? std::cout : std::cerr;
    }

    static std::string severityPrefix(Severity severity)
    {
        switch (severity)
        {
        case Severity::kINTERNAL_ERROR: return "[F] ";
        case Severity::kERROR: return "[E] ";
        case Severity::kWARNING: return "[W] ";
        case Severity::kINFO: return "[I] ";
        case Severity::kVERBOSE: return "[V] ";
        default: assert(0); return "";
        }
    }

    bool mShouldLog;
    Severity mSeverity;
};

//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
//! object.

class Logger : public nvinfer1::ILogger
{
public:
    Logger(Severity severity = Severity::kWARNING)
        : mReportableSeverity(severity)
    {
    }

    //!
    //! \enum TestResult
    //! \brief Represents the state of a given test
    //!
    enum class TestResult
    {
        kRUNNING, //!< The test is running
        kPASSED,  //!< The test passed
        kFAILED,  //!< The test failed
        kWAIVED   //!< The test was waived
    };

    //!
    //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
    //! \return The nvinfer1::ILogger associated with this Logger
    //!
    //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
    //! we can eliminate the inheritance of Logger from ILogger
    //!
    nvinfer1::ILogger& getTRTLogger()
    {
        return *this;
    }

    //!
    //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
    //!
    //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
    //! inheritance from nvinfer1::ILogger
    //!
    void log(Severity severity, const char* msg) TRT_NOEXCEPT override
    {
        LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
    }

    //!
    //! \brief Method for controlling the verbosity of logging output
    //!
    //! \param severity The logger will only emit messages that have severity of this level or higher.
    //!
    void setReportableSeverity(Severity severity)
    {
        mReportableSeverity = severity;
    }

    //!
    //! \brief Opaque handle that holds logging information for a particular test
    //!
    //! This object is an opaque handle to information used by the Logger to print test results.
    //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
    //! with Logger::reportTest{Start,End}().
    //!
    class TestAtom
    {
    public:
        TestAtom(TestAtom&&) = default;

    private:
        friend class Logger;

        TestAtom(bool started, const std::string& name, const std::string& cmdline)
            : mStarted(started)
            , mName(name)
            , mCmdline(cmdline)
        {
        }

        bool mStarted;
        std::string mName;
        std::string mCmdline;
    };

    //!
    //! \brief Define a test for logging
    //!
    //! \param[in] name The name of the test.  This should be a string starting with
    //!                  "TensorRT" and containing dot-separated strings containing
    //!                  the characters [A-Za-z0-9_].
    //!                  For example, "TensorRT.sample_googlenet"
    //! \param[in] cmdline The command line used to reproduce the test
    //
    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
    //!
    static TestAtom defineTest(const std::string& name, const std::string& cmdline)
    {
        return TestAtom(false, name, cmdline);
    }

    //!
    //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
    //!        as input
    //!
    //! \param[in] name The name of the test
    //! \param[in] argc The number of command-line arguments
    //! \param[in] argv The array of command-line arguments (given as C strings)
    //!
    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
    static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
    {
        auto cmdline = genCmdlineString(argc, argv);
        return defineTest(name, cmdline);
    }

    //!
    //! \brief Report that a test has started.
    //!
    //! \pre reportTestStart() has not been called yet for the given testAtom
    //!
    //! \param[in] testAtom The handle to the test that has started
    //!
    static void reportTestStart(TestAtom& testAtom)
    {
        reportTestResult(testAtom, TestResult::kRUNNING);
        assert(!testAtom.mStarted);
        testAtom.mStarted = true;
    }

    //!
    //! \brief Report that a test has ended.
    //!
    //! \pre reportTestStart() has been called for the given testAtom
    //!
    //! \param[in] testAtom The handle to the test that has ended
    //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
    //!                   TestResult::kFAILED, TestResult::kWAIVED
    //!
    static void reportTestEnd(const TestAtom& testAtom, TestResult result)
    {
        assert(result != TestResult::kRUNNING);
        assert(testAtom.mStarted);
        reportTestResult(testAtom, result);
    }

    static int reportPass(const TestAtom& testAtom)
    {
        reportTestEnd(testAtom, TestResult::kPASSED);
        return EXIT_SUCCESS;
    }

    static int reportFail(const TestAtom& testAtom)
    {
        reportTestEnd(testAtom, TestResult::kFAILED);
        return EXIT_FAILURE;
    }

    static int reportWaive(const TestAtom& testAtom)
    {
        reportTestEnd(testAtom, TestResult::kWAIVED);
        return EXIT_SUCCESS;
    }

    static int reportTest(const TestAtom& testAtom, bool pass)
    {
        return pass ? reportPass(testAtom) : reportFail(testAtom);
    }

    Severity getReportableSeverity() const
    {
        return mReportableSeverity;
    }

private:
    //!
    //! \brief returns an appropriate string for prefixing a log message with the given severity
    //!
    static const char* severityPrefix(Severity severity)
    {
        switch (severity)
        {
        case Severity::kINTERNAL_ERROR: return "[F] ";
        case Severity::kERROR: return "[E] ";
        case Severity::kWARNING: return "[W] ";
        case Severity::kINFO: return "[I] ";
        case Severity::kVERBOSE: return "[V] ";
        default: assert(0); return "";
        }
    }

    //!
    //! \brief returns an appropriate string for prefixing a test result message with the given result
    //!
    static const char* testResultString(TestResult result)
    {
        switch (result)
        {
        case TestResult::kRUNNING: return "RUNNING";
        case TestResult::kPASSED: return "PASSED";
        case TestResult::kFAILED: return "FAILED";
        case TestResult::kWAIVED: return "WAIVED";
        default: assert(0); return "";
        }
    }

    //!
    //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
    //!
    static std::ostream& severityOstream(Severity severity)
    {
        return severity >= Severity::kINFO ? std::cout : std::cerr;
    }

    //!
    //! \brief method that implements logging test results
    //!
    static void reportTestResult(const TestAtom& testAtom, TestResult result)
    {
        severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
                                         << testAtom.mCmdline << std::endl;
    }

    //!
    //! \brief generate a command line string from the given (argc, argv) values
    //!
    static std::string genCmdlineString(int argc, char const* const* argv)
    {
        std::stringstream ss;
        for (int i = 0; i < argc; i++)
        {
            if (i > 0)
                ss << " ";
            ss << argv[i];
        }
        return ss.str();
    }

    Severity mReportableSeverity;
};

namespace
{

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
//!
//! Example usage:
//!
//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
{
    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
}

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
//!
//! Example usage:
//!
//!     LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger)
{
    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
}

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
//!
//! Example usage:
//!
//!     LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger)
{
    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
}

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
//!
//! Example usage:
//!
//!     LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger)
{
    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
}

//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
//         ("fatal" severity)
//!
//! Example usage:
//!
//!     LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger)
{
    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
}

} // anonymous namespace

#endif // TENSORRT_LOGGING_H


================================================
FILE: arcface/macros.h
================================================
#ifndef __MACROS_H
#define __MACROS_H

#if NV_TENSORRT_MAJOR >= 8
#define TRT_NOEXCEPT noexcept
#define TRT_CONST_ENQUEUE const
#else
#define TRT_NOEXCEPT
#define TRT_CONST_ENQUEUE
#endif

#endif  // __MACROS_H

================================================
FILE: arcface/prelu.cu
================================================
#include <cmath>
#include <stdio.h>
#include <cassert>
#include <iostream>
#include "prelu.h"

namespace nvinfer1
{
    PReluPlugin::PReluPlugin(const std::vector<float>& gamma) : gamma_(gamma)
    {
    }

    PReluPlugin::~PReluPlugin()
    {
    }

    // create the plugin at runtime from a byte stream
    PReluPlugin::PReluPlugin(const void* data, size_t length)
    {
        char *p = (char*)data;
        input_size_ = reinterpret_cast<const int*>(p)[0];
        p += sizeof(int);
        gamma_.assign((float*)p, (float*)p + (length - sizeof(int)) / sizeof(float));
    }

    void PReluPlugin::serialize(void* buffer) const TRT_NOEXCEPT 
    {
        *reinterpret_cast<int*>(buffer) = input_size_;
        char *p = reinterpret_cast<char*>(buffer);
        p += sizeof(int);
        memcpy(p, gamma_.data(), gamma_.size() * sizeof(float));
    }

    size_t PReluPlugin::getSerializationSize() const TRT_NOEXCEPT
    {  
        return sizeof(input_size_) + gamma_.size() * sizeof(float);
    }

    int PReluPlugin::initialize() TRT_NOEXCEPT
    { 
        return 0;
    }

    Dims PReluPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) TRT_NOEXCEPT
    {
        assert(nbInputDims == 1);
        assert(index == 0);
        input_size_ = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2];
        // Output dimensions
        return Dims3(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]);
    }

    // Set plugin namespace
    void PReluPlugin::setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT
    {
        mPluginNamespace = pluginNamespace;
    }

    const char* PReluPlugin::getPluginNamespace() const TRT_NOEXCEPT
    {
        return mPluginNamespace;
    }

    // Return the DataType of the plugin output at the requested index
    DataType PReluPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT
    {
        return DataType::kFLOAT;
    }

    // Return true if output tensor is broadcast across a batch.
    bool PReluPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT
    {
        return false;
    }

    // Return true if plugin can use input that is broadcast across batch without replication.
    bool PReluPlugin::canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT
    {
        return false;
    }

    void PReluPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) TRT_NOEXCEPT
    {
    }

    // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
    void PReluPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT
    {
    }

    // Detach the plugin object from its execution context.
    void PReluPlugin::detachFromContext() TRT_NOEXCEPT {}

    const char* PReluPlugin::getPluginType() const TRT_NOEXCEPT
    {
        return "PRelu_TRT";
    }

    const char* PReluPlugin::getPluginVersion() const TRT_NOEXCEPT
    {
        return "1";
    }

    void PReluPlugin::destroy() TRT_NOEXCEPT
    {
        delete this;
    }

    // Clone the plugin
    IPluginV2IOExt* PReluPlugin::clone() const TRT_NOEXCEPT
    {
        PReluPlugin *p = new PReluPlugin(gamma_);
        p->input_size_ = input_size_;
        p->setPluginNamespace(mPluginNamespace);
        return p;
    }

    __global__ void prelu_kernel(const float *input, float *output, int num_elem, int input_size, int fm_size, const float* gamma) {

        int idx = threadIdx.x + blockDim.x * blockIdx.x;
        if (idx >= num_elem) return;

        if (input[idx] >= 0.0f) {
            output[idx] = input[idx];
            return;
        }
        int c = (idx % input_size) / fm_size;
        output[idx] = input[idx] * gamma[c];
    }

    void PReluPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) {
        int block_size = thread_count_;
        int grid_size = (input_size_ * batchSize + block_size - 1) / block_size;
        void *dev_gamma;
        assert(cudaMalloc(&dev_gamma, sizeof(float) * gamma_.size()) == cudaSuccess);
        assert(cudaMemcpy(dev_gamma, gamma_.data(), sizeof(float) * gamma_.size(), cudaMemcpyHostToDevice)  == cudaSuccess);
        prelu_kernel<<<grid_size, block_size>>>(inputs[0], output, input_size_ * batchSize, input_size_, input_size_ / gamma_.size(), (const float*)dev_gamma);
        assert(cudaFree(dev_gamma) == cudaSuccess);
    }

    int PReluPlugin::enqueue(int batchSize, const void*const * inputs, void* TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT
    {
        //assert(batchSize == 1);
        //GPU
        //CUDA_CHECK(cudaStreamSynchronize(stream));
        forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
        return 0;
    }

    PluginFieldCollection PReluPluginCreator::mFC{};
    std::vector<PluginField> PReluPluginCreator::mPluginAttributes;

    PReluPluginCreator::PReluPluginCreator()
    {
        mPluginAttributes.emplace_back(PluginField("gamma", nullptr, PluginFieldType::kFLOAT32, 1));

        mFC.nbFields = mPluginAttributes.size();
        mFC.fields = mPluginAttributes.data();
    }

    const char* PReluPluginCreator::getPluginName() const TRT_NOEXCEPT
    {
            return "PRelu_TRT";
    }

    const char* PReluPluginCreator::getPluginVersion() const TRT_NOEXCEPT
    {
            return "1";
    }

    const PluginFieldCollection* PReluPluginCreator::getFieldNames() TRT_NOEXCEPT
    {
            return &mFC;
    }

    IPluginV2IOExt* PReluPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT
    {
        std::vector<float> gamma;
        const PluginField* fields = fc->fields;
        for (int i = 0; i < fc->nbFields; ++i) {
            const char* attrName = fields[i].name;
            if (!strcmp(attrName, "gamma")) {
                assert(fields[i].type == PluginFieldType::kFLOAT32);
                int size = fields[i].length;
                gamma.reserve(size);
                const auto* w = static_cast<const float*>(fields[i].data);
                for (int j = 0; j < size; j++)
                {
                    gamma.push_back(*w);
                    w++;
                }
            }
        }

        PReluPlugin* obj = new PReluPlugin(gamma);
        obj->setPluginNamespace(mNamespace.c_str());
        return obj;
    }

    IPluginV2IOExt* PReluPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT
    {
        // This object will be deleted when the network is destroyed, which will
        // call PReluPlugin::destroy()
        PReluPlugin* obj = new PReluPlugin(serialData, serialLength);
        obj->setPluginNamespace(mNamespace.c_str());
        return obj;
    }

}



================================================
FILE: arcface/prelu.h
================================================
#ifndef _PRELU_PLUGIN_H
#define _PRELU_PLUGIN_H

#include <string>
#include <vector>
#include "NvInfer.h"
#include "macros.h"

namespace nvinfer1
{
    class PReluPlugin: public IPluginV2IOExt
    {
        public:
            PReluPlugin(const std::vector<float>& gamma);
            PReluPlugin(const void* data, size_t length);

            ~PReluPlugin();

            int getNbOutputs() const TRT_NOEXCEPT override
            {
                return 1;
            }

            Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) TRT_NOEXCEPT override;

            int initialize() TRT_NOEXCEPT override;

            virtual void terminate() TRT_NOEXCEPT override {};

            virtual size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override { return 0;}

            virtual int enqueue(int batchSize, const void*const * inputs, void*TRT_CONST_ENQUEUE* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT override;

            virtual size_t getSerializationSize() const TRT_NOEXCEPT override;

            virtual void serialize(void* buffer) const TRT_NOEXCEPT override;

            bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const TRT_NOEXCEPT override {
                return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
            }

            const char* getPluginType() const TRT_NOEXCEPT override;

            const char* getPluginVersion() const TRT_NOEXCEPT override;

            void destroy() TRT_NOEXCEPT override;

            IPluginV2IOExt* clone() const TRT_NOEXCEPT override;

            void setPluginNamespace(const char* pluginNamespace) TRT_NOEXCEPT override;

            const char* getPluginNamespace() const TRT_NOEXCEPT override;

            DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const TRT_NOEXCEPT override;

            bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const TRT_NOEXCEPT override;

            bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT override;

            void attachToContext(
                    cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) TRT_NOEXCEPT override;

            void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) TRT_NOEXCEPT override;

            void detachFromContext() TRT_NOEXCEPT override;

            int input_size_;
        private:
            void forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize = 1);
            int thread_count_ = 256;
            std::vector<float> gamma_;
            const char* mPluginNamespace;
    };

    class PReluPluginCreator : public IPluginCreator
    {
        public:
            PReluPluginCreator();

            ~PReluPluginCreator() override = default;

            const char* getPluginName() const TRT_NOEXCEPT override;

            const char* getPluginVersion() const TRT_NOEXCEPT override;

            const PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override;

            IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT override;

            IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRT_NOEXCEPT override;

            void setPluginNamespace(const char* libNamespace) TRT_NOEXCEPT override
            {
                mNamespace = libNamespace;
            }

            const char* getPluginNamespace() const TRT_NOEXCEPT override
            {
                return mNamespace.c_str();
            }

        private:
            std::string mNamespace;
            static PluginFieldCollection mFC;
            static std::vector<PluginField> mPluginAttributes;
    };
};
#endif 


================================================
FILE: centernet/README.md
================================================
# CenterNet

This is the trt implementation of detection model [ctdet_coco_dla_2x](https://drive.google.com/open?id=1pl_-ael8wE

Download .txt

gitextract_0y61g4fh/

├── .clang-format
├── .cmake-format.yaml
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   └── tensorrtx-issue-template.md
│   ├── stale.yml
│   └── workflows/
│       └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── alexnet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── alexnet.cc
│   ├── alexnet.py
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   └── utils.h
├── arcface/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── arcface-mobilefacenet.cpp
│   ├── arcface-r100.cpp
│   ├── arcface-r50.cpp
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── prelu.cu
│   └── prelu.h
├── assets/
│   └── 6.pgm
├── centernet/
│   ├── README.md
│   ├── centernet.py
│   ├── dcnv2Plugin/
│   │   ├── CMakeLists.txt
│   │   ├── dcn_v2_im2col_cuda.cu
│   │   ├── dcn_v2_im2col_cuda.h
│   │   ├── dcnv2Plugin.cpp
│   │   └── dcnv2Plugin.h
│   └── sample/
│       ├── common.py
│       └── test.py
├── contributing.md
├── convnextv2/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── config.yaml
│   ├── gen_wts.py
│   ├── inference.py
│   └── src/
│       ├── LayerNormPlugin.cu
│       ├── LayerNormPlugin.h
│       ├── convnextv2.cpp
│       ├── inference_cpp.cpp
│       └── logging.h
├── crnn/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── crnn.cpp
│   ├── genwts.py
│   └── logging.h
├── csrnet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── config.h
│   ├── csrnet.cpp
│   ├── gen_wts.py
│   ├── logging.h
│   └── macros.h
├── dbnet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── clipper/
│   │   ├── CMakeLists.txt
│   │   ├── clipper.cpp
│   │   └── clipper.hpp
│   ├── common.hpp
│   ├── dbnet.cpp
│   ├── logging.h
│   └── utils.h
├── densenet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── densenet121.cpp
│   ├── densenet121.py
│   └── logging.h
├── detr/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── backbone.hpp
│   ├── calibrator.hpp
│   ├── common.hpp
│   ├── detr.cpp
│   ├── gen_wts.py
│   ├── logging.h
│   └── macros.h
├── docker/
│   ├── README.md
│   ├── tensorrtx-docker-compose.yml
│   └── x86_64.dockerfile
├── efficient_ad/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── efficientAD_det.cpp
│   └── src/
│       ├── config.h
│       ├── cuda_utils.h
│       ├── logging.h
│       ├── macros.h
│       ├── model.cpp
│       ├── model.h
│       ├── postprocess.h
│       └── utils.h
├── efficientnet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── efficientnet.cpp
│   ├── gen_wts.py
│   ├── logging.h
│   └── utils.hpp
├── ghostnet/
│   ├── README.md
│   ├── ghostnetv1/
│   │   ├── CMakeLists.txt
│   │   ├── gen_wts.py
│   │   ├── ghostnetv1.cpp
│   │   └── logging.h
│   └── ghostnetv2/
│       ├── CMakeLists.txt
│       ├── gen_wts.py
│       ├── ghostnetv2.cpp
│       └── logging.h
├── googlenet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── googlenet.cpp
│   ├── logging.h
│   ├── macros.h
│   └── utils.h
├── hrnet/
│   ├── hrnet-image-classification/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── common.hpp
│   │   ├── demo.py
│   │   ├── hrnet.cpp
│   │   └── logging.h
│   └── hrnet-semantic-segmentation/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── common.hpp
│       ├── gen_wts.py
│       ├── hrnet.cpp
│       ├── hrnet_ocr.cpp
│       ├── hrnet_trt.py
│       └── logging.h
├── ibnnet/
│   ├── CMakeLists.txt
│   ├── InferenceEngine.cpp
│   ├── InferenceEngine.h
│   ├── README.md
│   ├── gen_wts.py
│   ├── holder.h
│   ├── ibnnet.cpp
│   ├── ibnnet.h
│   ├── layers.cpp
│   ├── layers.h
│   ├── logging.h
│   ├── main.cpp
│   ├── utils.cpp
│   └── utils.h
├── inception/
│   ├── inceptionv3/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── inception_v3.cpp
│   │   └── logging.h
│   └── inceptionv4/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── inception_v4.cpp
│       ├── inception_v4.h
│       ├── layers_api.cpp
│       ├── layers_api.h
│       ├── logging.h
│       ├── main.cpp
│       ├── utils.cpp
│       └── utils.h
├── lenet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── lenet.cpp
│   ├── lenet.py
│   ├── lenet_tripy.py
│   ├── logging.h
│   ├── macros.h
│   └── utils.h
├── lprnet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── lprnet.cpp
│   ├── macros.h
│   └── utils.h
├── mlp/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── logging.h
│   ├── macros.h
│   ├── mlp.cpp
│   ├── mlp.py
│   └── utils.h
├── mnasnet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── mnasnet.cpp
│   └── utils.h
├── mobilenet/
│   ├── mobilenetv2/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── logging.h
│   │   ├── mobilenet_v2.cpp
│   │   └── mobilenet_v2.py
│   └── mobilenetv3/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── logging.h
│       ├── mobilenet_v3.cpp
│       └── mobilenet_v3.py
├── psenet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_tf_wts.py
│   ├── layers.cpp
│   ├── layers.h
│   ├── main.cpp
│   ├── psenet.cpp
│   ├── psenet.h
│   ├── utils.cpp
│   └── utils.h
├── rcnn/
│   ├── BatchedNms.cu
│   ├── BatchedNmsPlugin.h
│   ├── CMakeLists.txt
│   ├── MaskRcnnInference.cu
│   ├── MaskRcnnInferencePlugin.h
│   ├── PredictorDecode.cu
│   ├── PredictorDecodePlugin.h
│   ├── README.md
│   ├── RoiAlign.cu
│   ├── RoiAlignPlugin.h
│   ├── RpnDecode.cu
│   ├── RpnDecodePlugin.h
│   ├── RpnNms.cu
│   ├── RpnNmsPlugin.h
│   ├── backbone.hpp
│   ├── calibrator.hpp
│   ├── common.hpp
│   ├── cuda_utils.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   └── rcnn.cpp
├── real-esrgan/
│   ├── general-x4v3/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── cmake/
│   │   │   └── FindTensorRT.cmake
│   │   ├── gen_wts.py
│   │   ├── main.cpp
│   │   └── src/
│   │       ├── include/
│   │       │   ├── config/
│   │       │   │   └── config.hpp
│   │       │   ├── cuda_utils.h
│   │       │   ├── logging/
│   │       │   │   └── logging.h
│   │       │   ├── pixel_shuffle/
│   │       │   │   └── pixel_shuffle.hpp
│   │       │   └── preprocess/
│   │       │       └── preprocess.hpp
│   │       └── pixel_shuffle/
│   │           ├── pixel_shuffle.cpp
│   │           └── pixel_shuffle.cu
│   └── x4plus/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── common.hpp
│       ├── cuda_utils.h
│       ├── gen_wts.py
│       ├── logging.h
│       ├── macros.h
│       ├── postprocess.cu
│       ├── postprocess.hpp
│       ├── preprocess.cu
│       ├── preprocess.hpp
│       ├── real-esrgan.cpp
│       └── utils.h
├── refinedet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── calibrator.cpp
│   ├── calibrator.h
│   ├── configure.h
│   ├── gen_wts_refinedet.py
│   ├── logging.h
│   ├── refinedet.cpp
│   └── utils.h
├── repvgg/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   └── repvgg.cpp
├── resnet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── logging.h
│   ├── resnet18.cpp
│   ├── resnet34.cpp
│   ├── resnet50.cpp
│   ├── resnet50.py
│   ├── resnext50_32x4d.cpp
│   ├── wide_resnet50.py
│   └── wideresnet50.cpp
├── retinaface/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── calibrator.cpp
│   ├── calibrator.h
│   ├── common.hpp
│   ├── decode.cu
│   ├── decode.h
│   ├── logging.h
│   ├── macros.h
│   ├── retina_mnet.cpp
│   ├── retina_r50.cpp
│   └── retinaface_trt.py
├── retinafaceAntiCov/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── decode.cu
│   ├── decode.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   └── retinafaceAntiCov.cpp
├── scaled-yolov4/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── common.hpp
│   ├── gen_wts.py
│   ├── logging.h
│   ├── mish.cu
│   ├── mish.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   └── yolov4_csp.cpp
├── senet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── logging.h
│   └── se_resnet50.cpp
├── shufflenetv2/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── shufflenetv2.cpp
│   └── utils.h
├── squeezenet/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── squeezenet.cpp
│   └── utils.h
├── superpoint/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── supernet.cpp
│   ├── utils.cpp
│   └── utils.h
├── swin-transformer/
│   └── semantic-segmentation/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── UpsampleKernel.cu
│       ├── UpsamplePlugin.cpp
│       ├── UpsamplePlugin.h
│       ├── UpsmapleKernel.h
│       ├── common.hpp
│       ├── fillmask.cu
│       ├── fillmask.h
│       ├── gelu.cu
│       ├── gelu.h
│       ├── gen_wts.py
│       ├── include/
│       │   └── dirent.h
│       ├── layerNorm.cu
│       ├── layerNorm.h
│       ├── logging.h
│       ├── main.cpp
│       ├── myhpp.h
│       ├── trainsform.cpp
│       └── utilsn.h
├── tsm/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── demo.sh
│   ├── gen_wts.py
│   ├── logging.h
│   ├── mmaction2_tsm_r50_config.py
│   ├── test_shift.py
│   ├── tsm_r50.cpp
│   └── tsm_r50.py
├── tutorials/
│   ├── check_fp16_int8_support.md
│   ├── faq.md
│   ├── from_pytorch_to_trt_stepbystep_hrnet.md
│   ├── getting_started.md
│   ├── install.md
│   ├── measure_performance.md
│   ├── migration_guide.md
│   ├── multi_GPU_processing.md
│   └── run_on_windows.md
├── ufld/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── common.hpp
│   ├── gen_wts.py
│   ├── lane_det.cpp
│   ├── logging.h
│   ├── macros.h
│   └── pth2onnx.py
├── unet/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── common.hpp
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   └── unet.cpp
├── vgg/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── logging.h
│   └── vgg11.cpp
├── vit/
│   ├── CMakeLists.txt
│   ├── FindTensorRT.cmake
│   ├── README.md
│   ├── cuda_allocator.cc
│   ├── cuda_allocator.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── profiler.cc
│   ├── profiler.h
│   ├── utils.h
│   └── vit.cc
├── yolo11/
│   ├── CMakeLists.txt
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── readme.md
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   ├── yolo11_cls.cpp
│   ├── yolo11_cls_trt.py
│   ├── yolo11_det.cpp
│   ├── yolo11_det_trt.py
│   ├── yolo11_obb.cpp
│   ├── yolo11_obb_trt.py
│   ├── yolo11_pose.cpp
│   ├── yolo11_pose_trt.py
│   ├── yolo11_seg.cpp
│   └── yolo11_seg_trt.py
├── yolo11_tripy/
│   ├── .gitignore
│   ├── README.md
│   ├── classify.py
│   ├── compile_classifier.py
│   ├── constants.py
│   ├── model/
│   │   ├── block.py
│   │   └── model.py
│   └── requirements.txt
├── yolo26/
│   ├── .clang-format
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── block.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   └── preprocess.cu
│   ├── yolo26_cls.cpp
│   ├── yolo26_det.cpp
│   └── yolo26_obb.cpp
├── yolop/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── common.hpp
│   ├── cuda_utils.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   ├── yolop.cpp
│   ├── yolop.hpp
│   └── yolop_trt.py
├── yolov10/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   └── preprocess.cu
│   ├── yolov10_det.cpp
│   └── yolov10_det_trt.py
├── yolov12/
│   ├── CMakeLists.txt
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── readme.md
│   ├── src/
│   │   ├── block.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   └── yolo12_det.cpp
├── yolov12-tubro/
│   ├── CMakeLists.txt
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── readme.md
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   ├── yolov12_cls.cpp
│   ├── yolov12_cls_trt.py
│   ├── yolov12_det.cpp
│   ├── yolov12_det_trt.py
│   ├── yolov12_seg.cpp
│   └── yolov12_seg_trt.py
├── yolov13/
│   ├── CMakeLists.txt
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── geluKernel.cu
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── readme.md
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   ├── yolov13_det.cpp
│   └── yolov13_det_trt.py
├── yolov3/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── calibrator.cpp
│   ├── calibrator.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   ├── yolov3.cpp
│   └── yolov3_trt.py
├── yolov3-spp/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── Utils.h
│   ├── gen_wts.py
│   ├── logging.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   └── yolov3-spp.cpp
├── yolov3-tiny/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── macros.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   └── yolov3-tiny.cpp
├── yolov4/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── logging.h
│   ├── mish.cu
│   ├── mish.h
│   ├── utils.h
│   ├── yololayer.cu
│   ├── yololayer.h
│   └── yolov4.cpp
├── yolov5/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── calibrator.cpp
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.cpp
│   │   ├── model.h
│   │   ├── postprocess.cpp
│   │   ├── postprocess.h
│   │   ├── preprocess.cu
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── yolov5_cls.cpp
│   ├── yolov5_cls_trt.py
│   ├── yolov5_det.cpp
│   ├── yolov5_det_cuda_python.py
│   ├── yolov5_det_trt.py
│   ├── yolov5_seg.cpp
│   └── yolov5_seg_trt.py
├── yolov5-lite/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── calibrator.cpp
│   ├── common.hpp
│   ├── gen_wts.py
│   ├── v5lite.cpp
│   ├── yololayer.cu
│   └── yolov5-lite-trt.py
├── yolov7/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── main.cpp
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   └── preprocess.cu
│   └── yolov7_trt.py
├── yolov8/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── gen_wts.py
│   ├── include/
│   │   ├── block.h
│   │   ├── calibrator.h
│   │   ├── config.h
│   │   ├── cuda_utils.h
│   │   ├── logging.h
│   │   ├── macros.h
│   │   ├── model.h
│   │   ├── postprocess.h
│   │   ├── preprocess.h
│   │   ├── types.h
│   │   └── utils.h
│   ├── plugin/
│   │   ├── yololayer.cu
│   │   └── yololayer.h
│   ├── src/
│   │   ├── block.cpp
│   │   ├── calibrator.cpp
│   │   ├── model.cpp
│   │   ├── postprocess.cpp
│   │   ├── postprocess.cu
│   │   └── preprocess.cu
│   ├── yolov8_5u_det.cpp
│   ├── yolov8_5u_det_trt.py
│   ├── yolov8_cls.cpp
│   ├── yolov8_cls_trt.py
│   ├── yolov8_det.cpp
│   ├── yolov8_det_trt.py
│   ├── yolov8_obb.cpp
│   ├── yolov8_obb_trt.py
│   ├── yolov8_pose.cpp
│   ├── yolov8_pose_trt.py
│   ├── yolov8_seg.cpp
│   └── yolov8_seg_trt.py
└── yolov9/
    ├── CMakeLists.txt
    ├── README.md
    ├── demo.cpp
    ├── gen_wts.py
    ├── include/
    │   ├── block.h
    │   ├── calibrator.h
    │   ├── config.h
    │   ├── cuda_utils.h
    │   ├── logging.h
    │   ├── macros.h
    │   ├── model.h
    │   ├── postprocess.h
    │   ├── preprocess.h
    │   ├── types.h
    │   └── utils.h
    ├── plugin/
    │   ├── yololayer.cu
    │   └── yololayer.h
    ├── src/
    │   ├── block.cpp
    │   ├── calibrator.cpp
    │   ├── model.cpp
    │   ├── postprocess.cpp
    │   ├── postprocess.cu
    │   └── preprocess.cu
    ├── windows/
    │   └── dirent.h
    └── yolov9_trt.py

Download .txt

Showing preview only (264K chars total). Download the full file or copy to clipboard to get everything.

SYMBOL INDEX (2883 symbols across 387 files)

FILE: alexnet/alexnet.cc
  function ICudaEngine (line 39) | ICudaEngine* createEngine(int32_t N, IRuntime* runtime, IBuilder* builde...
  function APIToModel (line 163) | void APIToModel(int32_t N, IRuntime* runtime, IHostMemory** modelStream) {
  function doInference (line 187) | std::vector<std::vector<float>> doInference(IExecutionContext& context, ...
  function main (line 257) | int main(int argc, char** argv) {

FILE: alexnet/alexnet.py
  function load_weights (line 24) | def load_weights(file):
  function create_engine (line 48) | def create_engine(max_batch_size, builder, config, dt):
  function API_to_model (line 167) | def API_to_model(max_batch_size):
  class HostDeviceMem (line 180) | class HostDeviceMem(object):
    method __init__ (line 181) | def __init__(self, host_mem, device_mem):
    method __str__ (line 185) | def __str__(self):
    method __repr__ (line 188) | def __repr__(self):
  function allocate_buffers (line 192) | def allocate_buffers(engine):
  function do_inference (line 213) | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):

FILE: alexnet/gen_wts.py
  function read_imagenet_labels (line 9) | def read_imagenet_labels() -> dict[int, str]:
  function preprocess (line 24) | def preprocess(img: np.array) -> torch.Tensor:

FILE: alexnet/logging.h
  function class (line 34) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 94) | class LogStreamConsumerBase {
  function std (line 140) | static std::string severityPrefix(Severity severity) {
  type TestInfo (line 188) | struct TestInfo
  function TestResult (line 197) | enum class TestResult : std::uint8_t {
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: alexnet/utils.h
  function cudaDeviceProp (line 32) | cudaDeviceProp prop{}
  function std (line 94) | static std::vector<float> preprocess_img(cv::Mat& img, bool bgr2rgb, con...
  type ScaleParams (line 169) | struct ScaleParams {
  function Weights (line 180) | const Weights scale{DataType::kFLOAT, params->scale.data(), 3ll};
  function getSize (line 237) | static size_t getSize(DataType dt) {

FILE: arcface/arcface-mobilefacenet.cpp
  function loadWeights (line 40) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 78) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 111) | ILayer* addPRelu(INetworkDefinition *network, std::map<std::string, Weig...
  function ILayer (line 149) | ILayer* conv_bn_relu(INetworkDefinition *network, std::map<std::string, ...
  function ILayer (line 163) | ILayer* conv_bn(INetworkDefinition *network, std::map<std::string, Weigh...
  function ILayer (line 175) | ILayer* DepthWise(INetworkDefinition *network, std::map<std::string, Wei...
  function ILayer (line 208) | ILayer* DWResidual(INetworkDefinition *network, std::map<std::string, We...
  function ICudaEngine (line 219) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 277) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 294) | void doInference(IExecutionContext& context, float* input, float* output...
  function read_files_in_dir (line 327) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...
  function main (line 349) | int main(int argc, char** argv) {

FILE: arcface/arcface-r100.cpp
  function loadWeights (line 40) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 78) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 111) | ILayer* addPRelu(INetworkDefinition *network, std::map<std::string, Weig...
  function ILayer (line 149) | ILayer* resUnit(INetworkDefinition *network, std::map<std::string, Weigh...
  function ICudaEngine (line 178) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 281) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 298) | void doInference(IExecutionContext& context, float* input, float* output...
  function read_files_in_dir (line 331) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...
  function main (line 353) | int main(int argc, char** argv) {

FILE: arcface/arcface-r50.cpp
  function loadWeights (line 40) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 78) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 111) | ILayer* addPRelu(INetworkDefinition *network, std::map<std::string, Weig...
  function ILayer (line 149) | ILayer* resUnit(INetworkDefinition *network, std::map<std::string, Weigh...
  function ICudaEngine (line 178) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 252) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 269) | void doInference(IExecutionContext& context, float* input, float* output...
  function read_files_in_dir (line 302) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...
  function main (line 324) | int main(int argc, char** argv) {

FILE: arcface/logging.h
  function class (line 38) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 113) | class LogStreamConsumerBase
  function std (line 167) | static std::string severityPrefix(Severity severity)
  function TestResult (line 220) | enum class TestResult
  function LogStreamConsumer (line 454) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 466) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 478) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 490) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 503) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: arcface/prelu.h
  function class (line 11) | class PReluPlugin: public IPluginV2IOExt

FILE: centernet/centernet.py
  class ModelData (line 21) | class ModelData(object):
  class Centernet_dla34 (line 28) | class Centernet_dla34(object):
    method __init__ (line 29) | def __init__(self, weights) -> None:
    method add_batchnorm_2d (line 38) | def add_batchnorm_2d(self, input_tensor, parent):
    method add_basic_block (line 51) | def add_basic_block(self, input_tensor, out_channels, residual=None, s...
    method add_level (line 79) | def add_level(self, input_tensor, out_channels, stride=1, dilation=1, ...
    method add_root (line 92) | def add_root(self, input_tensors: list, out_channels, kernel_size=1, r...
    method add_tree (line 111) | def add_tree(self, input_tensor, level, out_channels, residual=None, c...
    method add_base (line 145) | def add_base(self, input_tensor, parent):
    method add_deform_conv (line 171) | def add_deform_conv(self, input_tensor, out_channels, kernel=3, stride...
    method add_ida_up (line 214) | def add_ida_up(self, input_tensors, out_channels, up_f, startp, parent):
    method add_dla_up (line 231) | def add_dla_up(self, input_tensors, first_level, parent):
    method add_head (line 245) | def add_head(self, input_tensor, out_channels, head, head_conv=256, fi...
    method populate_network (line 259) | def populate_network(self):
    method build_engine (line 283) | def build_engine(self):
  function load_random_test_case (line 295) | def load_random_test_case(pagelocked_buffer):
  function main (line 303) | def main(args):

FILE: centernet/dcnv2Plugin/dcnv2Plugin.cpp
  function Dims (line 83) | Dims DeformableConvolutionalLayer::getOutputDimensions(int index, const ...
  function Weights (line 177) | Weights DeformableConvolutionalLayer::copyToDevice(const void* hostData,...
  function Weights (line 191) | Weights DeformableConvolutionalLayer::deserializeToDevice(const char*& h...
  function IPluginV2Ext (line 226) | IPluginV2Ext* DeformableConvolutionalLayer::clone() const
  function DataType (line 245) | DataType DeformableConvolutionalLayer::getOutputDataType(int index, cons...
  function PluginFieldCollection (line 311) | const PluginFieldCollection* DCNv2PluginCreator::getFieldNames()
  function IPluginV2Ext (line 316) | IPluginV2Ext* DCNv2PluginCreator::createPlugin(const char* name, const P...
  function IPluginV2Ext (line 395) | IPluginV2Ext* DCNv2PluginCreator::deserializePlugin(const char* name, co...

FILE: centernet/dcnv2Plugin/dcnv2Plugin.h
  function namespace (line 31) | namespace nvinfer1

FILE: centernet/sample/common.py
  function GiB (line 68) | def GiB(val):
  function add_help (line 72) | def add_help(description):
  function find_sample_data (line 77) | def find_sample_data(description="Runs a TensorRT Python sample", subfol...
  function locate_files (line 111) | def locate_files(data_paths, filenames, err_msg=""):
  class HostDeviceMem (line 142) | class HostDeviceMem(object):
    method __init__ (line 143) | def __init__(self, host_mem, device_mem):
    method __str__ (line 147) | def __str__(self):
    method __repr__ (line 150) | def __repr__(self):
  function allocate_buffers (line 154) | def allocate_buffers(engine):
  function do_inference (line 176) | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
  function do_inference_v2 (line 190) | def do_inference_v2(context, bindings, inputs, outputs, stream):
  function retry_call (line 204) | def retry_call(func, args=[], kwargs={}, n_retries=3):
  function retry (line 223) | def retry(n_retries=3):

FILE: centernet/sample/test.py
  function _gather_feat (line 16) | def _gather_feat(feat, ind, mask=None):
  function _transpose_and_gather_feat (line 27) | def _transpose_and_gather_feat(feat, ind):
  function pre_process (line 34) | def pre_process(image):
  function _nms (line 44) | def _nms(heat, kernel=3):
  function _topk (line 53) | def _topk(scores, K=40):
  function ctdet_decode (line 72) | def ctdet_decode(heat, wh, reg=None, cat_spec_wh=False, K=100):

FILE: convnextv2/gen_wts.py
  function gen_wts (line 5) | def gen_wts(model_path, wts_path):

FILE: convnextv2/inference.py
  function load_imagenet_labels (line 11) | def load_imagenet_labels(label_file="imagenet_classes.txt"):
  function main (line 20) | def main(engine_path, img_path, label_file="imagenet_classes.txt"):

FILE: convnextv2/src/LayerNormPlugin.h
  function getNbOutputs (line 19) | int32_t getNbOutputs() const noexcept override;

FILE: convnextv2/src/convnextv2.cpp
  type ConvNextConfig (line 17) | struct ConvNextConfig {
  function ConvNextConfig (line 25) | ConvNextConfig loadConfig(const std::string& configPath) {
  function loadWeights (line 111) | std::map<std::string, Weights> loadWeights(const std::string& file) {
  function IScaleLayer (line 136) | IScaleLayer* addBatchNorm2d(INetworkDefinition* network, ITensor& input,...
  function ITensor (line 162) | ITensor* convNextBlock(INetworkDefinition* network, ITensor* input, int ...
  function ICudaEngine (line 303) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 458) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function inference (line 469) | void inference(const std::string& engine_file, const std::string& image_...
  function main (line 564) | int main(int argc, char** argv) {

FILE: convnextv2/src/inference_cpp.cpp
  function load_imagenet_labels (line 14) | std::vector<std::string> load_imagenet_labels(const std::string& label_f...
  function inference (line 30) | void inference(const std::string& engine_file, const std::string& image_...
  function main (line 171) | int main(int argc, char** argv) {

FILE: convnextv2/src/logging.h
  function class (line 9) | class Logger : public ILogger {

FILE: crnn/crnn.cpp
  function strDecode (line 40) | std::string strDecode(std::vector<int>& preds, bool raw) {
  function loadWeights (line 57) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 95) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 128) | ILayer* convRelu(INetworkDefinition *network, std::map<std::string, Weig...
  function splitLstmWeights (line 143) | void splitLstmWeights(std::map<std::string, Weights>& weightMap, std::st...
  function ILayer (line 155) | ILayer* addLSTM(INetworkDefinition *network, std::map<std::string, Weigh...
  function ICudaEngine (line 211) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 284) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 301) | void doInference(IExecutionContext& context, cudaStream_t& stream, void ...
  function main (line 309) | int main(int argc, char** argv) {

FILE: crnn/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: csrnet/csrnet.cpp
  function loadWeights (line 27) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function doInference (line 106) | void doInference(IExecutionContext &context, float *input, float *output,
  function ICudaEngine (line 153) | ICudaEngine *createEngine(unsigned int maxBatchSize, IBuilder *builder,
  function APIToModel (line 377) | void APIToModel(unsigned int maxBatchSize, IHostMemory **modelStream) {
  function read_files_in_dir (line 397) | int read_files_in_dir(const char *p_dir_name,
  function main (line 415) | int main(int argc, char **argv) {

FILE: csrnet/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 101) | class LogStreamConsumerBase {
  function std (line 154) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 215) | enum class TestResult {
  function LogStreamConsumer (line 446) | inline LogStreamConsumer LOG_VERBOSE(const Logger &logger) {
  function LogStreamConsumer (line 458) | inline LogStreamConsumer LOG_INFO(const Logger &logger) {
  function LogStreamConsumer (line 470) | inline LogStreamConsumer LOG_WARN(const Logger &logger) {
  function LogStreamConsumer (line 482) | inline LogStreamConsumer LOG_ERROR(const Logger &logger) {
  function LogStreamConsumer (line 495) | inline LogStreamConsumer LOG_FATAL(const Logger &logger) {

FILE: dbnet/clipper/clipper.cpp
  type ClipperLib (line 51) | namespace ClipperLib {
    type Direction (line 57) | enum Direction { dRightToLeft, dLeftToRight }
    type TEdge (line 66) | struct TEdge {
    type IntersectNode (line 86) | struct IntersectNode {
    type LocalMinimum (line 92) | struct LocalMinimum {
    type OutPt (line 98) | struct OutPt
    type OutRec (line 102) | struct OutRec {
    type OutPt (line 112) | struct OutPt {
    type Join (line 119) | struct Join {
    type LocMinSorter (line 125) | struct LocMinSorter
    function cInt (line 136) | inline cInt Round(double val)
    function cInt (line 143) | inline cInt Abs(cInt val)
    function PolyNode (line 161) | PolyNode* PolyTree::GetFirst() const
    function PolyNode (line 202) | PolyNode* PolyNode::GetNext() const
    function PolyNode (line 211) | PolyNode* PolyNode::GetNextSiblingUp() const
    class Int128 (line 251) | class Int128
      method Int128 (line 257) | Int128(long64 _lo = 0)
      method Int128 (line 264) | Int128(const Int128 &val): lo(val.lo), hi(val.hi){}
      method Int128 (line 266) | Int128(const long64& _hi, const ulong64& _lo): lo(_lo), hi(_hi){}
      method Int128 (line 268) | Int128& operator = (const long64 &val)
      method Int128 (line 303) | Int128& operator += (const Int128 &rhs)
      method Int128 (line 311) | Int128 operator + (const Int128 &rhs) const
      method Int128 (line 318) | Int128& operator -= (const Int128 &rhs)
      method Int128 (line 324) | Int128 operator - (const Int128 &rhs) const
      method Int128 (line 331) | Int128 operator-() const //unary negation
    function Int128 (line 354) | Int128 Int128Mul (long64 lhs, long64 rhs)
      method Int128 (line 257) | Int128(long64 _lo = 0)
      method Int128 (line 264) | Int128(const Int128 &val): lo(val.lo), hi(val.hi){}
      method Int128 (line 266) | Int128(const long64& _hi, const ulong64& _lo): lo(_lo), hi(_hi){}
      method Int128 (line 268) | Int128& operator = (const long64 &val)
      method Int128 (line 303) | Int128& operator += (const Int128 &rhs)
      method Int128 (line 311) | Int128 operator + (const Int128 &rhs) const
      method Int128 (line 318) | Int128& operator -= (const Int128 &rhs)
      method Int128 (line 324) | Int128 operator - (const Int128 &rhs) const
      method Int128 (line 331) | Int128 operator-() const //unary negation
    function Orientation (line 385) | bool Orientation(const Path &poly)
    function Area (line 391) | double Area(const Path &poly)
    function Area (line 406) | double Area(const OutPt *op)
    function Area (line 419) | double Area(const OutRec &outRec)
    function PointIsVertex (line 425) | bool PointIsVertex(const IntPoint &Pt, OutPt *pp)
    function PointInPolygon (line 440) | int PointInPolygon(const IntPoint &pt, const Path &path)
    function PointInPolygon (line 484) | int PointInPolygon (const IntPoint &pt, OutPt *op)
    function Poly2ContainsPoly1 (line 526) | bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2)
    function SlopesEqual (line 541) | bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Ra...
    function SlopesEqual (line 554) | bool SlopesEqual(const IntPoint pt1, const IntPoint pt2,
    function SlopesEqual (line 566) | bool SlopesEqual(const IntPoint pt1, const IntPoint pt2,
    function IsHorizontal (line 578) | inline bool IsHorizontal(TEdge &e)
    function GetDx (line 584) | inline double GetDx(const IntPoint pt1, const IntPoint pt2)
    function SetDx (line 591) | inline void SetDx(TEdge &e)
    function SwapSides (line 599) | inline void SwapSides(TEdge &Edge1, TEdge &Edge2)
    function SwapPolyIndexes (line 607) | inline void SwapPolyIndexes(TEdge &Edge1, TEdge &Edge2)
    function cInt (line 615) | inline cInt TopX(TEdge &edge, const cInt currentY)
    function IntersectPoint (line 622) | void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip)
    function ReversePolyPtLinks (line 692) | void ReversePolyPtLinks(OutPt *pp)
    function DisposeOutPts (line 706) | void DisposeOutPts(OutPt*& pp)
    function InitEdge (line 719) | inline void InitEdge(TEdge* e, TEdge* eNext, TEdge* ePrev, const IntPo...
    function InitEdge2 (line 729) | void InitEdge2(TEdge& e, PolyType Pt)
    function TEdge (line 745) | TEdge* RemoveEdge(TEdge* e)
    function ReverseHorizontal (line 756) | inline void ReverseHorizontal(TEdge &e)
    function SwapPoints (line 768) | void SwapPoints(IntPoint &pt1, IntPoint &pt2)
    function GetOverlapSegment (line 776) | bool GetOverlapSegment(IntPoint pt1a, IntPoint pt1b, IntPoint pt2a,
    function FirstIsBottomPt (line 798) | bool FirstIsBottomPt(const OutPt* btmPt1, const OutPt* btmPt2)
    function OutPt (line 822) | OutPt* GetBottomPt(OutPt *pp)
    function Pt2IsBetweenPt1AndPt3 (line 860) | bool Pt2IsBetweenPt1AndPt3(const IntPoint pt1,
    function HorzSegmentsOverlap (line 872) | bool HorzSegmentsOverlap(cInt seg1a, cInt seg1b, cInt seg2a, cInt seg2b)
    function RangeTest (line 896) | void RangeTest(const IntPoint& Pt, bool& useFullRange)
    function TEdge (line 911) | TEdge* FindNextLocMin(TEdge* E)
    function TEdge (line 928) | TEdge* ClipperBase::ProcessBound(TEdge* E, bool NextIsForward)
    function IntRect (line 1295) | IntRect ClipperBase::GetBounds()
    function OutRec (line 1380) | OutRec* ClipperBase::CreateOutRec()
    function OutPt (line 1841) | OutPt* Clipper::AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt)
    function OutRec (line 2327) | OutRec* GetLowermostRec(OutRec *outRec1, OutRec *outRec2)
    function OutRec1RightOfOutRec2 (line 2347) | bool OutRec1RightOfOutRec2(OutRec* outRec1, OutRec* outRec2)
    function OutRec (line 2358) | OutRec* Clipper::GetOutRec(int Idx)
    function OutPt (line 2463) | OutPt* Clipper::AddOutPt(TEdge *e, const IntPoint &pt)
    function OutPt (line 2502) | OutPt* Clipper::GetLastOutPt(TEdge *e)
    function IsMinima (line 2520) | inline bool IsMinima(TEdge *e)
    function IsMaxima (line 2526) | inline bool IsMaxima(TEdge *e, const cInt Y)
    function IsIntermediate (line 2532) | inline bool IsIntermediate(TEdge *e, const cInt Y)
    function TEdge (line 2538) | TEdge *GetMaximaPair(TEdge *e)
    function TEdge (line 2548) | TEdge *GetMaximaPairEx(TEdge *e)
    function TEdge (line 2604) | TEdge* GetNextInAEL(TEdge *e, Direction dir)
    function GetHorzDirection (line 2610) | void GetHorzDirection(TEdge& HorzEdge, Direction& Dir, cInt& Left, cIn...
    function IntersectListSort (line 2921) | bool IntersectListSort(IntersectNode* node1, IntersectNode* node2)
    function EdgesAdjacent (line 2927) | inline bool EdgesAdjacent(const IntersectNode &inode)
    function PointCount (line 3184) | int PointCount(OutPt *Pts)
    function SwapIntersectNodes (line 3265) | void SwapIntersectNodes(IntersectNode &int1, IntersectNode &int2)
    function E2InsertsBeforeE1 (line 3278) | inline bool E2InsertsBeforeE1(TEdge &e1, TEdge &e2)
    function GetOverlap (line 3290) | bool GetOverlap(const cInt a1, const cInt a2, const cInt b1, const cIn...
    function UpdateOutPtIdxs (line 3307) | inline void UpdateOutPtIdxs(OutRec& outrec)
    function OutPt (line 3348) | OutPt* DupOutPt(OutPt* outPt, bool InsertAfter)
    function JoinHorz (line 3371) | bool JoinHorz(OutPt* op1, OutPt* op1b, OutPt* op2, OutPt* op2b,
    function OutRec (line 3617) | static OutRec* ParseFirstLeft(OutRec* FirstLeft)
    function DoublePoint (line 3769) | DoublePoint GetUnitNormal(const IntPoint &pt1, const IntPoint &pt2)
    function ReversePath (line 4283) | void ReversePath(Path& p)
    function ReversePaths (line 4289) | void ReversePaths(Paths& p)
    function SimplifyPolygon (line 4296) | void SimplifyPolygon(const Path &in_poly, Paths &out_polys, PolyFillTy...
    function SimplifyPolygons (line 4305) | void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, PolyFil...
    function SimplifyPolygons (line 4314) | void SimplifyPolygons(Paths &polys, PolyFillType fillType)
    function DistanceSqrd (line 4320) | inline double DistanceSqrd(const IntPoint& pt1, const IntPoint& pt2)
    function DistanceFromLineSqrd (line 4328) | double DistanceFromLineSqrd(
    function SlopesNearCollinear (line 4345) | bool SlopesNearCollinear(const IntPoint& pt1,
    function PointsAreClose (line 4372) | bool PointsAreClose(IntPoint pt1, IntPoint pt2, double distSqrd)
    function OutPt (line 4380) | OutPt* ExcludeOp(OutPt* op)
    function CleanPolygon (line 4390) | void CleanPolygon(const Path& in_poly, Path& out_poly, double distance)
    function CleanPolygon (line 4450) | void CleanPolygon(Path& poly, double distance)
    function CleanPolygons (line 4456) | void CleanPolygons(const Paths& in_polys, Paths& out_polys, double dis...
    function CleanPolygons (line 4464) | void CleanPolygons(Paths& polys, double distance)
    function Minkowski (line 4470) | void Minkowski(const Path& poly, const Path& path,
    function MinkowskiSum (line 4514) | void MinkowskiSum(const Path& pattern, const Path& path, Paths& soluti...
    function TranslatePath (line 4523) | void TranslatePath(const Path& input, Path& output, const IntPoint delta)
    function MinkowskiSum (line 4532) | void MinkowskiSum(const Path& pattern, const Paths& paths, Paths& solu...
    function MinkowskiDiff (line 4551) | void MinkowskiDiff(const Path& poly1, const Path& poly2, Paths& solution)
    type NodeType (line 4560) | enum NodeType {ntAny, ntOpen, ntClosed}
    function AddPolyNodeToPaths (line 4562) | void AddPolyNodeToPaths(const PolyNode& polynode, NodeType nodetype, P...
    function PolyTreeToPaths (line 4575) | void PolyTreeToPaths(const PolyTree& polytree, Paths& paths)
    function ClosedPathsFromPolyTree (line 4583) | void ClosedPathsFromPolyTree(const PolyTree& polytree, Paths& paths)
    function OpenPathsFromPolyTree (line 4591) | void OpenPathsFromPolyTree(PolyTree& polytree, Paths& paths)

FILE: dbnet/clipper/clipper.hpp
  type ClipperLib (line 62) | namespace ClipperLib {
    type ClipType (line 64) | enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor }
    type PolyType (line 65) | enum PolyType { ptSubject, ptClip }
    type PolyFillType (line 70) | enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative }
    type IntPoint (line 85) | struct IntPoint {
      method IntPoint (line 90) | IntPoint(cInt x = 0, cInt y = 0, cInt z = 0): X(x), Y(y), Z(z) {}
      method IntPoint (line 92) | IntPoint(cInt x = 0, cInt y = 0): X(x), Y(y) {}
    function Path (line 109) | inline Path& operator <<(Path& poly, const IntPoint& p) {poly.push_bac...
    function Paths (line 110) | inline Paths& operator <<(Paths& polys, const Path& p) {polys.push_bac...
    type DoublePoint (line 116) | struct DoublePoint
      method DoublePoint (line 120) | DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
      method DoublePoint (line 121) | DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
    type InitOptions (line 129) | enum InitOptions {ioReverseSolution = 1, ioStrictlySimple = 2, ioPrese...
    type JoinType (line 130) | enum JoinType {jtSquare, jtRound, jtMiter}
    type EndType (line 131) | enum EndType {etClosedPolygon, etClosedLine, etOpenButt, etOpenSquare,...
    class PolyNode (line 133) | class PolyNode
    class PolyNode (line 136) | class PolyNode
    class PolyTree (line 160) | class PolyTree: public PolyNode
    type IntRect (line 197) | struct IntRect { cInt left; cInt top; cInt right; cInt bottom; }
    type EdgeSide (line 200) | enum EdgeSide { esLeft = 1, esRight = 2}
    type TEdge (line 203) | struct TEdge
    type IntersectNode (line 204) | struct IntersectNode
    type LocalMinimum (line 205) | struct LocalMinimum
    type OutPt (line 206) | struct OutPt
    type OutRec (line 207) | struct OutRec
    type Join (line 208) | struct Join
    class ClipperBase (line 220) | class ClipperBase
      method PreserveCollinear (line 229) | bool PreserveCollinear() {return m_PreserveCollinear;}
      method PreserveCollinear (line 230) | void PreserveCollinear(bool value) {m_PreserveCollinear = value;}
    class Clipper (line 263) | class Clipper : public virtual ClipperBase
      method ReverseSolution (line 281) | bool ReverseSolution() { return m_ReverseOutput; }
      method ReverseSolution (line 282) | void ReverseSolution(bool value) {m_ReverseOutput = value;}
      method StrictlySimple (line 283) | bool StrictlySimple() {return m_StrictSimple;}
      method StrictlySimple (line 284) | void StrictlySimple(bool value) {m_StrictSimple = value;}
    class ClipperOffset (line 360) | class ClipperOffset
    class clipperException (line 391) | class clipperException : public std::exception
      method clipperException (line 394) | clipperException(const char* description): m_descr(description) {}

FILE: dbnet/common.hpp
  function loadWeights (line 29) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 65) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 98) | ILayer* convBnLeaky(INetworkDefinition *network, std::map<std::string, W...
  function IActivationLayer (line 120) | IActivationLayer* basicBlock(INetworkDefinition *network, std::map<std::...
  function read_files_in_dir (line 155) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...

FILE: dbnet/dbnet.cpp
  function expandBox (line 25) | cv::RotatedRect expandBox(cv::Point2f temp[], float ratio)
  function paddimg (line 56) | float paddimg(cv::Mat& In_Out_img, int shortsize = 960) {
  function ICudaEngine (line 83) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 263) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 281) | void doInference(IExecutionContext& context, float* input, float* output...
  function get_mini_boxes (line 315) | bool get_mini_boxes(cv::RotatedRect& rotated_rect, cv::Point2f rect[],
  function get_box_score (line 363) | float get_box_score(float* map, cv::Point2f rect[], int width, int height,
  function main (line 400) | int main(int argc, char** argv) {

FILE: dbnet/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: dbnet/utils.h
  function namespace (line 22) | namespace Tn

FILE: densenet/densenet121.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IScaleLayer (line 77) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IConvolutionLayer (line 111) | IConvolutionLayer* addDenseLayer(INetworkDefinition* network, ITensor* i...
  function IPoolingLayer (line 142) | IPoolingLayer* addTransition(INetworkDefinition* network, ITensor& input...
  function IConcatenationLayer (line 167) | IConcatenationLayer* addDenseBlock(INetworkDefinition* network, ITensor*...
  function ICudaEngine (line 194) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 267) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 290) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 324) | int main(int argc, char** argv)

FILE: densenet/densenet121.py
  function load_weights (line 26) | def load_weights(file):
  function add_batch_norm_2d (line 50) | def add_batch_norm_2d(network, weight_map, input, layer_name):
  function add_dense_layer (line 65) | def add_dense_layer(network, input, weight_map, lname):
  function add_transition (line 96) | def add_transition(network, input, weight_map, outch, lname):
  function add_dense_block (line 120) | def add_dense_block(network, input, weight_map, num_dense_layers, lname):
  function create_engine (line 137) | def create_engine(max_batch_size, builder, config, dt):
  function API_to_model (line 201) | def API_to_model(max_batch_size):
  class HostDeviceMem (line 214) | class HostDeviceMem(object):
    method __init__ (line 215) | def __init__(self, host_mem, device_mem):
    method __str__ (line 219) | def __str__(self):
    method __repr__ (line 222) | def __repr__(self):
  function allocate_buffers (line 226) | def allocate_buffers(engine):
  function do_inference (line 247) | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):

FILE: densenet/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 108) | class LogStreamConsumerBase
  function std (line 162) | static std::string severityPrefix(Severity severity)
  function TestResult (line 215) | enum class TestResult
  function LogStreamConsumer (line 451) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 463) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 475) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 487) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 500) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: detr/backbone.hpp
  type RESNETTYPE (line 5) | enum RESNETTYPE {
  function IScaleLayer (line 21) | IScaleLayer* addBatchNorm2d(
  function ILayer (line 60) | ILayer* BasicStem(
  function ITensor (line 94) | ITensor* BasicBlock(
  function ITensor (line 153) | ITensor* BottleneckBlock(
  function ITensor (line 246) | ITensor* MakeStage(
  function ITensor (line 283) | ITensor* BuildResNet(

FILE: detr/calibrator.hpp
  class Int8EntropyCalibrator2 (line 18) | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {

FILE: detr/common.hpp
  function loadWeights (line 18) | void loadWeights(const std::string file, std::unordered_map<std::string,...
  function CalculateSize (line 51) | int CalculateSize(Dims a) {
  function read_files_in_dir (line 59) | static inline int read_files_in_dir(const char *p_dir_name, std::vector<...
  function preprocessImg (line 81) | void preprocessImg(cv::Mat& img, int newh, int neww) {

FILE: detr/detr.cpp
  function ITensor (line 31) | ITensor* PositionEmbeddingSine(
  function ITensor (line 111) | ITensor* MultiHeadAttention(
  function ITensor (line 209) | ITensor* LayerNorm(
  function ITensor (line 262) | ITensor* TransformerEncoderLayer(
  function ITensor (line 314) | ITensor* TransformerEncoder(
  function ITensor (line 330) | ITensor* TransformerDecoderLayer(
  function ITensor (line 401) | ITensor* TransformerDecoder(
  function ITensor (line 433) | ITensor* Transformer(
  function ITensor (line 475) | ITensor* MLP(
  function Predict (line 510) | std::vector<ITensor*> Predict(
  function ICudaEngine (line 531) | ICudaEngine* createEngine_r50detr(
  function BuildDETRModel (line 619) | void BuildDETRModel(unsigned int maxBatchSize, IHostMemory** modelStream,
  function doInference (line 638) | void doInference(IExecutionContext& context, cudaStream_t& stream, std::...
  function parse_args (line 653) | bool parse_args(int argc, char** argv, std::string& wtsFile, std::string...
  function main (line 667) | int main(int argc, char** argv) {

FILE: detr/gen_wts.py
  function box_cxcywh_to_xyxy (line 12) | def box_cxcywh_to_xyxy(x):
  function build_backbone (line 18) | def build_backbone():
  function gen_wts (line 28) | def gen_wts(model, filename):
  function main (line 63) | def main():

FILE: detr/logging.h
  function class (line 34) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 98) | class LogStreamConsumerBase {
  function std (line 142) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 190) | enum class TestResult {
  function LogStreamConsumer (line 399) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 410) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 421) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 432) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 444) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: efficient_ad/efficientAD_det.cpp
  function parse_args (line 23) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function prepare_infer_buffers (line 39) | void prepare_infer_buffers(ICudaEngine* engine, float** gpu_input_buffer...
  function preprocessImg (line 59) | void preprocessImg(cv::Mat& img, int newh, int neww) {
  function infer (line 69) | void infer(IExecutionContext& context, cudaStream_t& stream, std::vector...
  function serialize_engine (line 83) | void serialize_engine(unsigned int max_batchsize, float& gd, float& gw, ...
  function deserialize_engine (line 113) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function main (line 139) | int main(int argc, char** argv) {

FILE: efficient_ad/src/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: efficient_ad/src/model.cpp
  function loadWeights (line 19) | static std::map<std::string, Weights> loadWeights(const std::string file) {
  function printNetworkLayers (line 55) | void printNetworkLayers(INetworkDefinition* network) {
  function IScaleLayer (line 116) | static IScaleLayer* NormalizeInput(INetworkDefinition* network, ITensor&...
  function IScaleLayer (line 128) | static IScaleLayer* NormalizeTeacherMap(INetworkDefinition* network, std...
  function ILayer (line 157) | static ILayer* NormalizeFinalMap(INetworkDefinition* network, std::map<s...
  function ILayer (line 208) | static ILayer* convRelu(INetworkDefinition* network, std::map<std::strin...
  function IResizeLayer (line 229) | static IResizeLayer* interpolate(INetworkDefinition* network, ITensor& i...
  function ILayer (line 239) | static ILayer* interpConvRelu(INetworkDefinition* network, std::map<std:...
  function IPoolingLayer (line 260) | static IPoolingLayer* avgPool2d(INetworkDefinition* network, ITensor& in...
  function slice (line 269) | static void slice(INetworkDefinition* network, ITensor& input, std::vect...
  function IElementWiseLayer (line 283) | static IElementWiseLayer* mergeMap(INetworkDefinition* network, ITensor&...
  function ICudaEngine (line 303) | ICudaEngine* build_efficientAD_engine(unsigned int maxBatchSize, IBuilde...

FILE: efficient_ad/src/postprocess.h
  function genHeatMap (line 5) | void genHeatMap(cv::Mat originImg, cv::Mat& anomalyGrayMap, cv::Mat& Hea...

FILE: efficient_ad/src/utils.h
  function read_files_in_dir (line 11) | static inline int read_files_in_dir(const char* p_dir_name, std::vector<...

FILE: efficientnet/efficientnet.cpp
  function ICudaEngine (line 46) | ICudaEngine *createEngine(unsigned int maxBatchSize, IBuilder *builder, ...
  function APIToModel (line 122) | void APIToModel(unsigned int maxBatchSize, IHostMemory **modelStream, st...
  function doInference (line 140) | void doInference(IExecutionContext &context, float *input, float *output...
  function parse_args (line 174) | bool parse_args(int argc, char **argv, std::string &wts, std::string &en...
  function main (line 194) | int main(int argc, char **argv)

FILE: efficientnet/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: efficientnet/utils.hpp
  function loadWeights (line 23) | std::map<std::string, Weights> loadWeights(const std::string file)
  type BlockArgs (line 62) | struct BlockArgs
  type GlobalParams (line 74) | struct GlobalParams
  function roundFilters (line 86) | int roundFilters(int filters, GlobalParams global_params)
  function DimsHW (line 103) | DimsHW calculateOutputImageSize(DimsHW image_size, int stride)
  function roundRepeats (line 110) | int roundRepeats(int repeats, GlobalParams global_params)
  function IScaleLayer (line 118) | IScaleLayer *addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IConvolutionLayer (line 154) | IConvolutionLayer *addSamePaddingConv2d(INetworkDefinition *network, std...
  function ILayer (line 191) | ILayer *addSwish(INetworkDefinition *network, ITensor &input)
  function ITensor (line 199) | ITensor *MBConvBlock(INetworkDefinition *network, std::map<std::string, ...

FILE: ghostnet/ghostnetv1/gen_wts.py
  function _make_divisible (line 16) | def _make_divisible(v, divisor, min_value=None):
  function hard_sigmoid (line 32) | def hard_sigmoid(x, inplace: bool = False):
  class SqueezeExcite (line 39) | class SqueezeExcite(nn.Module):
    method __init__ (line 40) | def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None,
    method forward (line 50) | def forward(self, x):
  class ConvBnAct (line 59) | class ConvBnAct(nn.Module):
    method __init__ (line 60) | def __init__(self, in_chs, out_chs, kernel_size,
    method forward (line 67) | def forward(self, x):
  class GhostModule (line 74) | class GhostModule(nn.Module):
    method __init__ (line 75) | def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride...
    method forward (line 93) | def forward(self, x):
  class GhostBottleneck (line 100) | class GhostBottleneck(nn.Module):
    method __init__ (line 103) | def __init__(self, in_chs, mid_chs, out_chs, dw_kernel_size=3,
    method forward (line 139) | def forward(self, x):
  class GhostNet (line 161) | class GhostNet(nn.Module):
    method __init__ (line 162) | def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2):
    method forward (line 201) | def forward(self, x):
  function ghostnet (line 216) | def ghostnet(**kwargs):
  function setup_seed (line 247) | def setup_seed(seed):
  function export_weight (line 254) | def export_weight(model):
  function eval_model (line 274) | def eval_model(input, model):

FILE: ghostnet/ghostnetv1/ghostnetv1.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function _make_divisible (line 80) | int _make_divisible(int v, int divisor, int min_value = -1) {
  function ILayer (line 94) | ILayer* hardSigmoid(INetworkDefinition* network, ITensor& input) {
  function IScaleLayer (line 101) | IScaleLayer* addBatchNorm2d(INetworkDefinition* network, std::map<std::s...
  function IActivationLayer (line 135) | IActivationLayer* convBnReluStem(INetworkDefinition* network, std::map<s...
  function ILayer (line 153) | ILayer* convBnAct(INetworkDefinition* network, std::map<std::string, Wei...
  function ILayer (line 170) | ILayer* squeezeExcite(INetworkDefinition* network, ITensor& input, std::...
  function ILayer (line 204) | ILayer* ghostModule(INetworkDefinition* network, ITensor& input, std::ma...
  function ILayer (line 258) | ILayer* ghostBottleneck(INetworkDefinition* network, ITensor& input, std...
  function ICudaEngine (line 311) | ICudaEngine* createEngine(IBuilder* builder, IBuilderConfig* config, Dat...
  function APIToModel (line 393) | void APIToModel(IHostMemory** modelStream) {
  function doInference (line 411) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 442) | int main(int argc, char** argv) {

FILE: ghostnet/ghostnetv1/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 90) | class LogStreamConsumerBase {
  function std (line 136) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 190) | enum class TestResult {
  function LogStreamConsumer (line 404) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 415) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 426) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 437) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 449) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: ghostnet/ghostnetv2/gen_wts.py
  function _make_divisible (line 13) | def _make_divisible(v, divisor, min_value=None):
  function hard_sigmoid (line 29) | def hard_sigmoid(x, inplace: bool = False):
  class SqueezeExcite (line 36) | class SqueezeExcite(nn.Module):
    method __init__ (line 37) | def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None,
    method forward (line 47) | def forward(self, x):
  class ConvBnAct (line 56) | class ConvBnAct(nn.Module):
    method __init__ (line 57) | def __init__(self, in_chs, out_chs, kernel_size,
    method forward (line 64) | def forward(self, x):
  class GhostModuleV2 (line 71) | class GhostModuleV2(nn.Module):
    method __init__ (line 72) | def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride...
    method forward (line 114) | def forward(self, x):
  class GhostBottleneckV2 (line 129) | class GhostBottleneckV2(nn.Module):
    method __init__ (line 131) | def __init__(self, in_chs, mid_chs, out_chs, dw_kernel_size=3,
    method forward (line 169) | def forward(self, x):
  class GhostNetV2 (line 182) | class GhostNetV2(nn.Module):
    method __init__ (line 183) | def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2, blo...
    method forward (line 222) | def forward(self, x):
  function ghostnetv2 (line 238) | def ghostnetv2(**kwargs):
  function setup_seed (line 264) | def setup_seed(seed):
  function export_weight (line 271) | def export_weight(model):
  function eval_model (line 291) | def eval_model(input, model):

FILE: ghostnet/ghostnetv2/ghostnetv2.cpp
  function loadWeights (line 36) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function _make_divisible (line 78) | int _make_divisible(int v, int divisor, int min_value = -1) {
  function ILayer (line 95) | ILayer* hardSigmoid(INetworkDefinition* network, ITensor& input) {
  function IScaleLayer (line 103) | IScaleLayer* addBatchNorm2d(INetworkDefinition* network, std::map<std::s...
  function IActivationLayer (line 137) | IActivationLayer* convBnReluStem(INetworkDefinition* network, std::map<s...
  function ILayer (line 158) | ILayer* convBnAct(INetworkDefinition* network, std::map<std::string, Wei...
  function ILayer (line 178) | ILayer* squeezeExcite(INetworkDefinition* network, ITensor& input, std::...
  function ILayer (line 211) | ILayer* ghostModuleV2(INetworkDefinition* network, ITensor& input, std::...
  function ILayer (line 321) | ILayer* ghostBottleneck(INetworkDefinition* network, ITensor& input, std...
  function ICudaEngine (line 379) | ICudaEngine* createEngine(IBuilder* builder, IBuilderConfig* config, Dat...
  function APIToModel (line 464) | void APIToModel(IHostMemory** modelStream) {
  function doInference (line 482) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 513) | int main(int argc, char** argv) {

FILE: ghostnet/ghostnetv2/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 90) | class LogStreamConsumerBase {
  function std (line 136) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 190) | enum class TestResult {
  function LogStreamConsumer (line 404) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 415) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 426) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 437) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 449) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: googlenet/gen_wts.py
  function read_imagenet_labels (line 9) | def read_imagenet_labels() -> dict[int, str]:
  function preprocess (line 24) | def preprocess(img: np.array) -> torch.Tensor:
  function main (line 43) | def main():

FILE: googlenet/googlenet.cpp
  function addBatchNorm2d (line 30) | auto addBatchNorm2d(INetworkDefinition* network, WeightMap& m, ITensor& ...
  function ILayer (line 73) | ILayer* basicConv2d(INetworkDefinition* network, WeightMap& weightMap, I...
  function IConcatenationLayer (line 104) | IConcatenationLayer* inception(INetworkDefinition* network, WeightMap& w...
  function ICudaEngine (line 128) | ICudaEngine* createEngine(int32_t N, IRuntime* runtime, IBuilder* builde...
  function APIToModel (line 225) | void APIToModel(int32_t N, IRuntime* runtime, IHostMemory** modelStream) {
  function doInference (line 248) | std::vector<std::vector<float>> doInference(IExecutionContext& context, ...
  function main (line 306) | int main(int argc, char** argv) {

FILE: googlenet/logging.h
  function class (line 34) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 94) | class LogStreamConsumerBase {
  function std (line 140) | static std::string severityPrefix(Severity severity) {
  type TestInfo (line 188) | struct TestInfo
  function TestResult (line 197) | enum class TestResult : std::uint8_t {
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: googlenet/utils.h
  function cudaDeviceProp (line 30) | cudaDeviceProp prop{}
  function std (line 92) | static std::vector<float> preprocess_img(cv::Mat& img, bool bgr2rgb, con...
  type ScaleParams (line 167) | struct ScaleParams {
  function Weights (line 178) | const Weights scale{DataType::kFLOAT, params->scale.data(), 3ll};
  function getSize (line 235) | static size_t getSize(DataType dt) {

FILE: hrnet/hrnet-image-classification/common.hpp
  function read_files_in_dir (line 26) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...
  function loadWeights (line 50) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 88) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 122) | ILayer* convBnLeaky(INetworkDefinition *network, std::map<std::string, W...
  function IActivationLayer (line 142) | IActivationLayer* ResBlock2Conv(INetworkDefinition *network, std::map<st...
  function IActivationLayer (line 187) | IActivationLayer* ResBlock(INetworkDefinition *network, std::map<std::st...
  function IActivationLayer (line 224) | IActivationLayer* liteResBlock(INetworkDefinition *network, std::map<std...
  function ILayer (line 252) | ILayer* netAddUpsample(INetworkDefinition* network, ITensor* input, int ...

FILE: hrnet/hrnet-image-classification/demo.py
  function parse_args (line 42) | def parse_args():
  function main (line 74) | def main():

FILE: hrnet/hrnet-image-classification/hrnet.cpp
  function ICudaEngine (line 21) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 604) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 621) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 654) | int main(int argc, char** argv) {

FILE: hrnet/hrnet-image-classification/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: hrnet/hrnet-semantic-segmentation/common.hpp
  function read_files_in_dir (line 26) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...
  function debug_print (line 51) | void debug_print(ITensor *input_tensor, std::string head)
  function loadWeights (line 61) | std::map<std::string, Weights> loadWeights(const std::string file)
  function createLTU (line 100) | cv::Mat createLTU(int len)
  function ITensor (line 110) | ITensor *MeanStd(INetworkDefinition *network, ITensor *input, float *mea...
  function IScaleLayer (line 142) | IScaleLayer *addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 180) | ILayer *convBnRelu(INetworkDefinition *network,
  function IActivationLayer (line 212) | IActivationLayer *ResBlock2Conv(INetworkDefinition *network, std::map<st...
  function IActivationLayer (line 261) | IActivationLayer *ResBlock(INetworkDefinition *network, std::map<std::st...
  function IActivationLayer (line 299) | IActivationLayer *liteResBlock(INetworkDefinition *network, std::map<std...
  function ILayer (line 328) | ILayer *convBnAddRelu(INetworkDefinition *network, std::map<std::string,...
  function ILayer (line 351) | ILayer *netAddUpsampleBi(INetworkDefinition *network, ITensor *input, Di...
  function IElementWiseLayer (line 361) | IElementWiseLayer *convBnUpAdd(INetworkDefinition *network,

FILE: hrnet/hrnet-semantic-segmentation/gen_wts.py
  function parse_args (line 10) | def parse_args():
  function main (line 30) | def main():

FILE: hrnet/hrnet-semantic-segmentation/hrnet.cpp
  function ICudaEngine (line 23) | ICudaEngine *createEngine(unsigned int maxBatchSize, IBuilder *builder, ...
  function APIToModel (line 389) | void APIToModel(unsigned int maxBatchSize, IHostMemory **modelStream, st...
  function parse_args (line 400) | bool parse_args(int argc, char **argv, std::string &wts, std::string &en...
  function doInference (line 419) | void doInference(IExecutionContext &context, cudaStream_t &stream, void ...
  function main (line 426) | int main(int argc, char **argv)

FILE: hrnet/hrnet-semantic-segmentation/hrnet_ocr.cpp
  function ICudaEngine (line 23) | ICudaEngine *createEngine(unsigned int maxBatchSize, IBuilder *builder, ...
  function APIToModel (line 510) | void APIToModel(unsigned int maxBatchSize, IHostMemory **modelStream, st...
  function parse_args (line 521) | bool parse_args(int argc, char **argv, std::string &wts, std::string &en...
  function doInference (line 540) | void doInference(IExecutionContext &context, cudaStream_t &stream, void ...
  function main (line 547) | int main(int argc, char **argv)

FILE: hrnet/hrnet-semantic-segmentation/hrnet_trt.py
  function get_img_path_batches (line 17) | def get_img_path_batches(batch_size, img_dir):
  class Hrnet_TRT (line 30) | class Hrnet_TRT(object):
    method __init__ (line 35) | def __init__(self, engine_file_path):
    method infer (line 82) | def infer(self, image_raw):
    method destroy (line 122) | def destroy(self):
    method preprocess_image (line 126) | def preprocess_image(self, image_raw):
    method get_raw_image (line 145) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 152) | def get_raw_image_zeros(self, image_path_batch=None):
  class inferThread (line 160) | class inferThread(threading.Thread):
    method __init__ (line 161) | def __init__(self, hrnet_wrapper, image_path_batch):
    method run (line 166) | def run(self):
  class warmUpThread (line 176) | class warmUpThread(threading.Thread):
    method __init__ (line 177) | def __init__(self, hrnet_wrapper):
    method run (line 181) | def run(self):

FILE: hrnet/hrnet-semantic-segmentation/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: ibnnet/InferenceEngine.cpp
  type trt (line 3) | namespace trt {

FILE: ibnnet/InferenceEngine.h
  type EngineConfig (line 23) | struct EngineConfig {
  function class (line 35) | class InferenceEngine {

FILE: ibnnet/holder.h
  function T (line 30) | T* get() { return holder; }
  function explicit (line 31) | explicit operator bool() { return holder != nullptr; }

FILE: ibnnet/ibnnet.cpp
  type trt (line 5) | namespace trt {
    function ICudaEngine (line 23) | ICudaEngine *IBNNet::createEngine(IBuilder* builder, IBuilderConfig* c...

FILE: ibnnet/ibnnet.h
  type IBN (line 16) | enum IBN {
  function DataType (line 42) | DataType _dt{DataType::kFLOAT};

FILE: ibnnet/layers.cpp
  type trtxapi (line 3) | namespace trtxapi {
    function ITensor (line 5) | ITensor* MeanStd(INetworkDefinition *network, std::map<std::string, We...
    function IScaleLayer (line 30) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std:...
    function IScaleLayer (line 63) | IScaleLayer* addInstanceNorm2d(INetworkDefinition *network, std::map<s...
    function IConcatenationLayer (line 131) | IConcatenationLayer* addIBN(INetworkDefinition *network, std::map<std:...
    function IActivationLayer (line 154) | IActivationLayer* bottleneck_ibn(INetworkDefinition *network, std::map...

FILE: ibnnet/layers.h
  function namespace (line 10) | namespace trtxapi {

FILE: ibnnet/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: ibnnet/main.cpp
  function run_infer (line 17) | void run_infer(std::shared_ptr<trt::IBNNet> model) {
  function main (line 50) | int main(int argc, char** argv) {

FILE: ibnnet/utils.cpp
  function loadWeights (line 6) | std::map<std::string, Weights> loadWeights(const std::string file) {

FILE: inception/inceptionv3/inception_v3.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IScaleLayer (line 77) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IActivationLayer (line 111) | IActivationLayer* basicConv2d(INetworkDefinition *network, std::map<std:...
  function IConcatenationLayer (line 126) | IConcatenationLayer* inceptionA(INetworkDefinition *network, std::map<st...
  function IConcatenationLayer (line 150) | IConcatenationLayer* inceptionB(INetworkDefinition *network, std::map<st...
  function IConcatenationLayer (line 167) | IConcatenationLayer* inceptionC(INetworkDefinition *network, std::map<st...
  function IConcatenationLayer (line 194) | IConcatenationLayer* inceptionD(INetworkDefinition *network, std::map<st...
  function IConcatenationLayer (line 213) | IConcatenationLayer* inceptionE(INetworkDefinition *network, std::map<st...
  function ICudaEngine (line 245) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 316) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 335) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 369) | int main(int argc, char** argv)

FILE: inception/inceptionv3/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: inception/inceptionv4/inception_v4.cpp
  type trtx (line 4) | namespace trtx {

FILE: inception/inceptionv4/inception_v4.h
  type InceptionV4Params (line 19) | struct InceptionV4Params
  function fp16 (line 24) | bool fp16{false};                  // Allow running the network in FP16 ...
  function class (line 35) | class InceptionV4 {

FILE: inception/inceptionv4/layers_api.cpp
  type trtxlayers (line 3) | namespace trtxlayers {
    function IScaleLayer (line 4) | IScaleLayer* addBatchNorm2d(
    function IActivationLayer (line 45) | IActivationLayer* basicConv2d(
    function IConcatenationLayer (line 72) | IConcatenationLayer* mixed_3a(
    function IConcatenationLayer (line 94) | IConcatenationLayer* mixed_4a(
    function IConcatenationLayer (line 118) | IConcatenationLayer* mixed_5a(
    function IConcatenationLayer (line 142) | IConcatenationLayer* inceptionA(
    function IConcatenationLayer (line 177) | IConcatenationLayer* reductionA(
    function IConcatenationLayer (line 204) | IConcatenationLayer* inceptionB(
    function IConcatenationLayer (line 242) | IConcatenationLayer* reductionB(
    function IConcatenationLayer (line 272) | IConcatenationLayer* inceptionC(

FILE: inception/inceptionv4/layers_api.h
  function namespace (line 14) | namespace trtxlayers {

FILE: inception/inceptionv4/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 108) | class LogStreamConsumerBase
  function std (line 162) | static std::string severityPrefix(Severity severity)
  function TestResult (line 215) | enum class TestResult
  function LogStreamConsumer (line 451) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 463) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 475) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 487) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 500) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: inception/inceptionv4/main.cpp
  function initializeParams (line 8) | trtx::InceptionV4Params initializeParams()
  function main (line 28) | int main(int argc, char** argv){

FILE: inception/inceptionv4/utils.cpp
  function loadWeights (line 7) | std::map<std::string, Weights> loadWeights(const std::string file) {

FILE: lenet/gen_wts.py
  class LeNet (line 10) | class LeNet(nn.Module):
    method __init__ (line 11) | def __init__(self):
    method forward (line 27) | def forward(self, x):
  function reformat_state_dict (line 48) | def reformat_state_dict(state: OrderedDict) -> OrderedDict:
  function main (line 66) | def main():

FILE: lenet/lenet.cpp
  function ICudaEngine (line 36) | ICudaEngine* createLenetEngine(int32_t N, IRuntime* runtime, IBuilder* b...
  function APIToModel (line 164) | void APIToModel(int32_t N, IRuntime* runtime, IHostMemory** modelStream) {
  function doInference (line 187) | std::vector<std::vector<float>> doInference(IExecutionContext& context, ...
  function main (line 245) | int main(int argc, char** argv) {

FILE: lenet/lenet.py
  function load_weights (line 23) | def load_weights(file):
  function createLenetEngine (line 47) | def createLenetEngine(maxBatchSize, builder, config, dt):
  function APIToModel (line 131) | def APIToModel(maxBatchSize):
  function doInference (line 143) | def doInference(context, host_in, host_out, batchSize):

FILE: lenet/lenet_tripy.py
  function load_weights (line 12) | def load_weights(file):
  class Lenet5 (line 28) | class Lenet5(tp.Module):
    method __init__ (line 29) | def __init__(self):
    method forward (line 38) | def forward(self, x):
  function main (line 52) | def main():

FILE: lenet/logging.h
  function class (line 34) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 94) | class LogStreamConsumerBase {
  function std (line 140) | static std::string severityPrefix(Severity severity) {
  type TestInfo (line 188) | struct TestInfo
  function TestResult (line 197) | enum class TestResult : std::uint8_t {
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: lenet/utils.h
  type std (line 18) | enum : std::uint32_t { WORKSPACE_SIZE = 16 << 20 }
  function cudaDeviceProp (line 32) | cudaDeviceProp prop{}
  function getSize (line 99) | static size_t getSize(DataType dt) {

FILE: lprnet/gen_wts.py
  function preprocess (line 20) | def preprocess(path):
  class small_basic_block (line 30) | class small_basic_block(nn.Module):
    method __init__ (line 31) | def __init__(self, ch_in, ch_out):
    method forward (line 43) | def forward(self, x):
  class LPRNet (line 47) | class LPRNet(nn.Module):
    method __init__ (line 48) | def __init__(self, class_num, dropout_rate):
    method forward (line 82) | def forward(self, x):

FILE: lprnet/logging.h
  function class (line 34) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 94) | class LogStreamConsumerBase {
  function std (line 140) | static std::string severityPrefix(Severity severity) {
  type TestInfo (line 188) | struct TestInfo
  function TestResult (line 197) | enum class TestResult : std::uint8_t {
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: lprnet/lprnet.cpp
  function IScaleLayer (line 46) | IScaleLayer* addBatchNorm2d(INetworkDefinition* network, WeightMap& weig...
  function IConvolutionLayer (line 81) | IConvolutionLayer* smallBasicBlock(INetworkDefinition* network, WeightMa...
  function ICudaEngine (line 113) | ICudaEngine* createEngine(int32_t N, IRuntime* runtime, IBuilder* builde...
  function APIToModel (line 274) | void APIToModel(int32_t N, IRuntime* runtime, IHostMemory** modelStream) {
  function doInference (line 294) | auto doInference(IExecutionContext& context, void* input, int64_t batchS...
  function main (line 352) | int main(int argc, char** argv) {

FILE: lprnet/utils.h
  function cudaDeviceProp (line 28) | cudaDeviceProp prop{}
  function std (line 90) | static inline std::vector<float> preprocess_img(cv::Mat& img, bool bgr2r...
  type ScaleParams (line 165) | struct ScaleParams {
  function Weights (line 176) | const Weights scale{DataType::kFLOAT, params->scale.data(), 3ll};
  function getSize (line 233) | static inline size_t getSize(DataType dt) {

FILE: mlp/logging.h
  function class (line 34) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 94) | class LogStreamConsumerBase {
  function std (line 140) | static std::string severityPrefix(Severity severity) {
  type TestInfo (line 188) | struct TestInfo
  function TestResult (line 197) | enum class TestResult : std::uint8_t {
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: mlp/mlp.cpp
  function ICudaEngine (line 30) | ICudaEngine* createMLPEngine(int32_t N, IRuntime* runtime, IBuilder* bui...
  function APIToModel (line 84) | void APIToModel(int32_t maxBatchSize, IRuntime* runtime, IHostMemory** m...
  function doInference (line 114) | void doInference(IExecutionContext& ctx, void* input, float* output, int...
  function main (line 177) | int main(int argc, char** argv) {

FILE: mlp/mlp.py
  function load_weights (line 31) | def load_weights(file_path):
  function create_mlp_engine (line 69) | def create_mlp_engine(max_batch_size, builder, config, dt):
  function api_to_model (line 116) | def api_to_model(max_batch_size):
  function perform_inference (line 146) | def perform_inference(input_val):
  function get_args (line 220) | def get_args():

FILE: mlp/utils.h
  function cudaDeviceProp (line 27) | cudaDeviceProp prop{}
  function getSize (line 77) | static size_t getSize(DataType dt) {

FILE: mnasnet/gen_wts.py
  function read_imagenet_labels (line 12) | def read_imagenet_labels() -> dict[int, str]:
  function preprocess (line 27) | def preprocess(img: np.array) -> torch.Tensor:
  function main (line 46) | def main():

FILE: mnasnet/logging.h
  function class (line 34) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 94) | class LogStreamConsumerBase {
  function std (line 140) | static std::string severityPrefix(Severity severity) {
  type TestInfo (line 188) | struct TestInfo
  function TestResult (line 197) | enum class TestResult : std::uint8_t {
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: mnasnet/mnasnet.cpp
  type ConvParams (line 36) | struct ConvParams {
  type InvertedResParams (line 46) | struct InvertedResParams {
  function ILayer (line 54) | ILayer* addBatchNorm2d(INetworkDefinition* network, WeightMap& weightMap...
  function ILayer (line 89) | ILayer* CBR(INetworkDefinition* net, WeightMap& map, const std::string& ...
  function ILayer (line 124) | ILayer* invertedRes(INetworkDefinition* network, WeightMap& w, ITensor& ...
  function ICudaEngine (line 157) | ICudaEngine* createEngine(unsigned int maxBatchSize, IRuntime* runtime, ...
  function APIToModel (line 240) | void APIToModel(unsigned int maxBatchSize, IRuntime* runtime, IHostMemor...
  function do_inference (line 264) | std::vector<std::vector<float>> do_inference(IExecutionContext& context,...
  function main (line 322) | int main(int argc, char** argv) {

FILE: mnasnet/utils.h
  function cudaDeviceProp (line 30) | cudaDeviceProp prop{}
  function std (line 92) | static std::vector<float> preprocess_img(cv::Mat& img, bool bgr2rgb, con...
  function loadImagenetLabelMap (line 139) | int, std::string> loadImagenetLabelMap(const std::string& path) {
  type ScaleParams (line 167) | struct ScaleParams {
  function Weights (line 178) | const Weights scale{DataType::kFLOAT, params->scale.data(), 3ll};
  function getSize (line 235) | static size_t getSize(DataType dt) {

FILE: mobilenet/mobilenetv2/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 90) | class LogStreamConsumerBase {
  function std (line 136) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 190) | enum class TestResult {
  function LogStreamConsumer (line 404) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 415) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 426) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 437) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 449) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: mobilenet/mobilenetv2/mobilenet_v2.cpp
  function loadWeights (line 36) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 72) | IScaleLayer* addBatchNorm2d(INetworkDefinition* network, std::map<std::s...
  function IElementWiseLayer (line 107) | IElementWiseLayer* convBnRelu(INetworkDefinition* network, std::map<std:...
  function ILayer (line 147) | ILayer* invertedRes(INetworkDefinition* network, std::map<std::string, W...
  function ICudaEngine (line 177) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 235) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 253) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 288) | int main(int argc, char** argv) {

FILE: mobilenet/mobilenetv2/mobilenet_v2.py
  function load_weights (line 25) | def load_weights(file):
  function add_batch_norm_2d (line 49) | def add_batch_norm_2d(network, weight_map, input, layer_name, eps):
  function conv_bn_relu (line 64) | def conv_bn_relu(network, weight_map, input, outch, ksize, s, g, lname):
  function inverted_res (line 102) | def inverted_res(network, weight_map, input, lname, inch, outch, s, exp):
  function create_engine (line 135) | def create_engine(max_batch_size, builder, config, dt):
  function API_to_model (line 187) | def API_to_model(max_batch_size):
  class HostDeviceMem (line 200) | class HostDeviceMem(object):
    method __init__ (line 201) | def __init__(self, host_mem, device_mem):
    method __str__ (line 205) | def __str__(self):
    method __repr__ (line 208) | def __repr__(self):
  function allocate_buffers (line 212) | def allocate_buffers(engine):
  function do_inference (line 233) | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):

FILE: mobilenet/mobilenetv3/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 90) | class LogStreamConsumerBase {
  function std (line 136) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 190) | enum class TestResult {
  function LogStreamConsumer (line 404) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 415) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 426) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 437) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 449) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: mobilenet/mobilenetv3/mobilenet_v3.cpp
  function loadWeights (line 37) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 73) | IScaleLayer* addBatchNorm(INetworkDefinition* network, std::map<std::str...
  function ILayer (line 108) | ILayer* hSwish(INetworkDefinition* network, ITensor& input, std::string ...
  function ILayer (line 118) | ILayer* convBnHswish(INetworkDefinition* network, std::map<std::string, ...
  function ILayer (line 135) | ILayer* seLayer(INetworkDefinition* network, std::map<std::string, Weigh...
  function ILayer (line 157) | ILayer* convSeq1(INetworkDefinition* network, std::map<std::string, Weig...
  function ILayer (line 191) | ILayer* convSeq2(INetworkDefinition* network, std::map<std::string, Weig...
  function ILayer (line 235) | ILayer* invertedRes(INetworkDefinition* network, std::map<std::string, W...
  function ICudaEngine (line 253) | ICudaEngine* createEngineSmall(unsigned int maxBatchSize, IBuilder* buil...
  function ICudaEngine (line 314) | ICudaEngine* createEngineLarge(unsigned int maxBatchSize, IBuilder* buil...
  function APIToModel (line 375) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, st...
  function doInference (line 400) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 435) | int main(int argc, char** argv) {

FILE: mobilenet/mobilenetv3/mobilenet_v3.py
  function load_weights (line 26) | def load_weights(file):
  function add_batch_norm_2d (line 50) | def add_batch_norm_2d(network, weight_map, input, layer_name, eps):
  function add_h_swish (line 65) | def add_h_swish(network, input):
  function conv_bn_h_swish (line 76) | def conv_bn_h_swish(network, weight_map, input, outch, ksize, s, g, lname):
  function add_se_layer (line 96) | def add_se_layer(network, weight_map, input, c, w, lname):
  function conv_seq_1 (line 119) | def conv_seq_1(network, weight_map, input, output, hdim, k, s, use_se, u...
  function conv_seq_2 (line 157) | def conv_seq_2(network, weight_map, input, output, hdim, k, s, use_se, u...
  function inverted_res (line 207) | def inverted_res(network, weight_map, input, lname, inch, outch, s, hidd...
  function create_engine_small (line 224) | def create_engine_small(max_batch_size, builder, config, dt):
  function create_engine_large (line 282) | def create_engine_large(max_batch_size, builder, config, dt):
  function API_to_model (line 340) | def API_to_model(max_batch_size, model_type):
  class HostDeviceMem (line 358) | class HostDeviceMem(object):
    method __init__ (line 359) | def __init__(self, host_mem, device_mem):
    method __str__ (line 363) | def __str__(self):
    method __repr__ (line 366) | def __repr__(self):
  function allocate_buffers (line 370) | def allocate_buffers(engine):
  function do_inference (line 391) | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):

FILE: psenet/layers.cpp
  function IScaleLayer (line 3) | IScaleLayer* addBatchNorm2d(INetworkDefinition* network, std::map<std::s...
  function IActivationLayer (line 37) | IActivationLayer* bottleneck(INetworkDefinition* network, std::map<std::...
  function IActivationLayer (line 97) | IActivationLayer* addConvRelu(INetworkDefinition* network, std::map<std:...

FILE: psenet/main.cpp
  function main (line 3) | int main(int argc, char** argv)

FILE: psenet/psenet.cpp
  function ICudaEngine (line 21) | ICudaEngine* PSENet::createEngine(IBuilder* builder, IBuilderConfig* con...

FILE: psenet/psenet.h
  function class (line 9) | class PSENet

FILE: psenet/utils.cpp
  function loadWeights (line 6) | std::map<std::string, Weights> loadWeights(const std::string file)
  function expandBox (line 46) | cv::RotatedRect expandBox(const cv::RotatedRect& inBox, float ratio)
  function drawRects (line 55) | void drawRects(cv::Mat& image, std::vector<cv::RotatedRect> boxes, float...

FILE: psenet/utils.h
  type InferDeleter (line 22) | struct InferDeleter
  function class (line 46) | class Logger : public nvinfer1::ILogger

FILE: rcnn/BatchedNmsPlugin.h
  function namespace (line 15) | namespace nvinfer1 {
  function Dims (line 91) | Dims getOutputDimensions(int index,
  function supportsFormat (line 98) | bool supportsFormat(DataType type, PluginFormat format) const TRT_NOEXCE...
  function initialize (line 102) | int initialize() TRT_NOEXCEPT override { return 0; }
  function terminate (line 104) | void terminate() TRT_NOEXCEPT override {}
  function getWorkspaceSize (line 106) | size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override {
  function enqueue (line 116) | int enqueue(int batchSize,
  function destroy (line 124) | void destroy() TRT_NOEXCEPT override {
  function setPluginNamespace (line 132) | void setPluginNamespace(const char *N) TRT_NOEXCEPT override {
  function DataType (line 136) | DataType getOutputDataType(int index, const DataType* inputTypes, int nb...
  function isOutputBroadcastAcrossBatch (line 141) | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBr...
  function canBroadcastInputAcrossBatch (line 146) | bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT ove...
  function configurePlugin (line 148) | void configurePlugin(const Dims* inputDims, int nbInputs, const Dims* ou...
  function IPluginV2Ext (line 159) | IPluginV2Ext *clone() const TRT_NOEXCEPT override {
  function class (line 175) | class BatchedNmsPluginCreator : public IPluginCreator {

FILE: rcnn/MaskRcnnInferencePlugin.h
  function namespace (line 15) | namespace nvinfer1 {
  function Dims (line 72) | Dims getOutputDimensions(int index,
  function supportsFormat (line 77) | bool supportsFormat(DataType type, PluginFormat format) const TRT_NOEXCE...
  function initialize (line 80) | int initialize() TRT_NOEXCEPT override { return 0; }
  function terminate (line 81) | void terminate() TRT_NOEXCEPT override {}
  function getWorkspaceSize (line 82) | size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override {
  function enqueue (line 85) | int enqueue(int batchSize,
  function destroy (line 91) | void destroy() TRT_NOEXCEPT override {
  function setPluginNamespace (line 97) | void setPluginNamespace(const char *N) TRT_NOEXCEPT override {
  function DataType (line 100) | DataType getOutputDataType(int index, const DataType* inputTypes, int nb...
  function isOutputBroadcastAcrossBatch (line 104) | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBr...
  function canBroadcastInputAcrossBatch (line 108) | bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT ove...
  function configurePlugin (line 109) | void configurePlugin(const Dims* inputDims, int nbInputs, const Dims* ou...
  function IPluginV2Ext (line 121) | IPluginV2Ext *clone() const TRT_NOEXCEPT override {
  function class (line 136) | class MaskRcnnInferencePluginCreator : public IPluginCreator {

FILE: rcnn/PredictorDecodePlugin.h
  function namespace (line 15) | namespace nvinfer1 {
  function Dims (line 103) | Dims getOutputDimensions(int index,
  function supportsFormat (line 110) | bool supportsFormat(DataType type, PluginFormat format) const TRT_NOEXCE...
  function initialize (line 114) | int initialize() TRT_NOEXCEPT override { return 0; }
  function terminate (line 116) | void terminate() TRT_NOEXCEPT override {}
  function getWorkspaceSize (line 118) | size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override {
  function enqueue (line 127) | int enqueue(int batchSize,
  function destroy (line 135) | void destroy() TRT_NOEXCEPT override {
  function setPluginNamespace (line 143) | void setPluginNamespace(const char *N) TRT_NOEXCEPT override {}
  function DataType (line 146) | DataType getOutputDataType(int index, const DataType* inputTypes, int nb...
  function isOutputBroadcastAcrossBatch (line 151) | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBr...
  function canBroadcastInputAcrossBatch (line 156) | bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT ove...
  function configurePlugin (line 158) | void configurePlugin(const Dims* inputDims, int nbInputs, const Dims* ou...
  function IPluginV2Ext (line 176) | IPluginV2Ext *clone() const TRT_NOEXCEPT override {
  function class (line 192) | class PredictorDecodePluginCreator : public IPluginCreator {

FILE: rcnn/RoiAlignPlugin.h
  function namespace (line 15) | namespace nvinfer1 {
  function class (line 172) | class RoiAlignPluginCreator : public IPluginCreator {

FILE: rcnn/RpnDecodePlugin.h
  function namespace (line 15) | namespace nvinfer1 {

FILE: rcnn/RpnNmsPlugin.h
  function namespace (line 15) | namespace nvinfer1 {
  function Dims (line 85) | Dims getOutputDimensions(int index,
  function supportsFormat (line 92) | bool supportsFormat(DataType type, PluginFormat format) const TRT_NOEXCE...
  function initialize (line 96) | int initialize() TRT_NOEXCEPT override { return 0; }
  function terminate (line 98) | void terminate() TRT_NOEXCEPT override {}
  function getWorkspaceSize (line 100) | size_t getWorkspaceSize(int maxBatchSize) const TRT_NOEXCEPT override {
  function enqueue (line 109) | int enqueue(int batchSize,
  function destroy (line 117) | void destroy() TRT_NOEXCEPT override {
  function setPluginNamespace (line 125) | void setPluginNamespace(const char *N) TRT_NOEXCEPT override {
  function DataType (line 129) | DataType getOutputDataType(int index, const DataType* inputTypes, int nb...
  function isOutputBroadcastAcrossBatch (line 134) | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBr...
  function canBroadcastInputAcrossBatch (line 139) | bool canBroadcastInputAcrossBatch(int inputIndex) const TRT_NOEXCEPT ove...
  function configurePlugin (line 141) | void configurePlugin(const Dims* inputDims, int nbInputs, const Dims* ou...
  function IPluginV2Ext (line 151) | IPluginV2Ext *clone() const TRT_NOEXCEPT override {
  function class (line 167) | class RpnNmsPluginCreator : public IPluginCreator {

FILE: rcnn/backbone.hpp
  type RESNETTYPE (line 11) | enum RESNETTYPE {
  function ILayer (line 27) | ILayer* BasicStem(INetworkDefinition *network,
  function ITensor (line 51) | ITensor* BasicBlock(INetworkDefinition *network,
  function ITensor (line 100) | ITensor* BottleneckBlock(INetworkDefinition *network,
  function ITensor (line 168) | ITensor* MakeStage(INetworkDefinition *network,
  function ITensor (line 195) | ITensor* BuildResNet(INetworkDefinition *network,

FILE: rcnn/calibrator.hpp
  class Int8EntropyCalibrator2 (line 19) | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {

FILE: rcnn/common.hpp
  function loadWeights (line 24) | void loadWeights(const std::string file, std::map<std::string, Weights>&...
  function read_files_in_dir (line 57) | static inline int read_files_in_dir(const char *p_dir_name, std::vector<...
  function preprocessImg (line 79) | static inline cv::Mat preprocessImg(cv::Mat& img, int input_w, int input...

FILE: rcnn/gen_wts.py
  function fuse_conv_and_bn (line 6) | def fuse_conv_and_bn(conv):
  function fuse_bn (line 30) | def fuse_bn(model):
  function gen_wts (line 37) | def gen_wts(model, filename):

FILE: rcnn/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 96) | class LogStreamConsumerBase {
  function std (line 140) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 188) | enum class TestResult {
  function LogStreamConsumer (line 397) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 408) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 419) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 430) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 442) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: rcnn/rcnn.cpp
  function GenerateAnchors (line 62) | std::vector<float> GenerateAnchors(const std::vector<float>& anchor_sizes,
  function ITensor (line 80) | ITensor* DataPreprocess(INetworkDefinition *network, ITensor& input) {
  function ITensor (line 102) | ITensor* RPN(INetworkDefinition *network,
  function ITensor (line 147) | ITensor* SharedRoiTransform(INetworkDefinition *network, std::map<std::s...
  function BoxHead (line 165) | void BoxHead(INetworkDefinition *network, std::map<std::string, Weights>...
  function MaskHead (line 204) | void MaskHead(INetworkDefinition *network, std::map<std::string, Weights...
  function ROIHeads (line 235) | std::vector<ITensor*> ROIHeads(INetworkDefinition *network, std::map<std...
  function ICudaEngine (line 250) | ICudaEngine* createEngine_rcnn(unsigned int maxBatchSize,
  function BuildRcnnModel (line 310) | void BuildRcnnModel(unsigned int maxBatchSize, IHostMemory** modelStream...
  function doInference (line 328) | void doInference(IExecutionContext& context, cudaStream_t& stream, std::...
  function calculateSize (line 349) | void calculateSize() {
  function parse_args (line 369) | bool parse_args(int argc, char** argv, std::string& wtsFile, std::string...
  function main (line 384) | int main(int argc, char** argv) {

FILE: real-esrgan/general-x4v3/gen_wts.py
  function main (line 11) | def main():

FILE: real-esrgan/general-x4v3/main.cpp
  function loadWeights (line 21) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function build_engine (line 83) | void build_engine(DataType dt, std::string& wts_path) {
  function read_files_in_dir (line 164) | static inline int read_files_in_dir(const char* p_dir_name, std::vector<...
  function doInference (line 185) | void doInference(IExecutionContext& context, cudaStream_t& stream, void*...
  function main (line 193) | int main(int argc, char** argv) {

FILE: real-esrgan/general-x4v3/src/include/logging/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 90) | class LogStreamConsumerBase {
  function std (line 136) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 190) | enum class TestResult {
  function LogStreamConsumer (line 404) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 415) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 426) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 437) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 449) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: real-esrgan/general-x4v3/src/include/pixel_shuffle/pixel_shuffle.hpp
  class PixelShufflePlugin (line 8) | class PixelShufflePlugin : public nvinfer1::IPluginV2DynamicExt {
    method PixelShufflePlugin (line 10) | PixelShufflePlugin(int upscaleFactor) : mUpscaleFactor(upscaleFactor) {}
    method PixelShufflePlugin (line 12) | PixelShufflePlugin(const void* data, size_t length) { memcpy(&mUpscale...
    method getNbOutputs (line 18) | int getNbOutputs() const noexcept override { return 1; }
    method getOutputDimensions (line 34) | nvinfer1::DimsExprs getOutputDimensions(int32_t outputIndex, nvinfer1:...
    method supportsFormatCombination (line 51) | bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDe...
    method getOutputDataType (line 56) | nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataTy...
    method configurePlugin (line 67) | void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* inputs, ...
    method getWorkspaceSize (line 75) | size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int ...
    method getSerializationSize (line 80) | size_t getSerializationSize() const noexcept override { return sizeof(...
    method serialize (line 82) | void serialize(void* buffer) const noexcept override { memcpy(buffer, ...
    method destroy (line 84) | void destroy() noexcept override {
    method setPluginNamespace (line 90) | void setPluginNamespace(const char* pluginNamespace) noexcept override...
    method initialize (line 94) | int initialize() noexcept override { return 0; }
    method terminate (line 100) | void terminate() noexcept override {}
  class PixelShufflePluginCreator (line 107) | class PixelShufflePluginCreator : public nvinfer1::IPluginCreator {
    method PixelShufflePluginCreator (line 109) | PixelShufflePluginCreator() {
    method setPluginNamespace (line 140) | void setPluginNamespace(const char* pluginNamespace) noexcept override...

FILE: real-esrgan/general-x4v3/src/include/preprocess/preprocess.hpp
  type PreprocessStruct (line 4) | struct PreprocessStruct {

FILE: real-esrgan/x4plus/common.hpp
  function loadWeights (line 15) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function ITensor (line 53) | ITensor* residualDenseBlock(INetworkDefinition *network, std::map<std::s...
  function ITensor (line 115) | ITensor* RRDB(INetworkDefinition *network, std::map<std::string, Weights...

FILE: real-esrgan/x4plus/gen_wts.py
  function main (line 8) | def main():

FILE: real-esrgan/x4plus/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 107) | class LogStreamConsumerBase
  function std (line 161) | static std::string severityPrefix(Severity severity)
  function TestResult (line 214) | enum class TestResult
  function LogStreamConsumer (line 448) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 472) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 484) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 497) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: real-esrgan/x4plus/postprocess.hpp
  type Postprocess (line 7) | struct Postprocess {
  type nvinfer1 (line 14) | namespace nvinfer1
    class PostprocessPluginV2 (line 16) | class PostprocessPluginV2 : public IPluginV2IOExt
      method PostprocessPluginV2 (line 19) | PostprocessPluginV2(const Postprocess& arg)
      method PostprocessPluginV2 (line 24) | PostprocessPluginV2(const void* data, size_t length)
      method PostprocessPluginV2 (line 31) | PostprocessPluginV2() = delete;
      method getNbOutputs (line 36) | int getNbOutputs() const noexcept override
      method Dims (line 41) | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputD...
      method initialize (line 46) | int initialize() noexcept override
      method terminate (line 51) | void terminate() noexcept override
      method getWorkspaceSize (line 55) | size_t getWorkspaceSize(int maxBatchSize) const noexcept override
      method getSerializationSize (line 62) | size_t getSerializationSize() const noexcept override
      method serialize (line 69) | void serialize(void* buffer) const noexcept override
      method configurePlugin (line 77) | void configurePlugin(const PluginTensorDesc* in, int nbInput, const ...
      method supportsFormatCombination (line 82) | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOu...
      method DataType (line 91) | DataType getOutputDataType(int index, const DataType* inputTypes, in...
      method destroy (line 107) | void destroy() noexcept override
      method IPluginV2Ext (line 112) | IPluginV2Ext* clone() const noexcept override
      method setPluginNamespace (line 118) | void setPluginNamespace(const char* libNamespace) noexcept override
      method isOutputBroadcastAcrossBatch (line 128) | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* input...
      method canBroadcastInputAcrossBatch (line 133) | bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept ove...
      method write (line 140) | void write(char*& buffer, const T& val) const
      method T (line 147) | T read(const char*& buffer) const
    class PostprocessPluginV2Creator (line 159) | class PostprocessPluginV2Creator : public IPluginCreator
      method PluginFieldCollection (line 172) | const PluginFieldCollection* getFieldNames() noexcept override
      method IPluginV2 (line 177) | IPluginV2* createPlugin(const char* name, const PluginFieldCollectio...
      method IPluginV2 (line 184) | IPluginV2* deserializePlugin(const char* name, const void* serialDat...
      method setPluginNamespace (line 191) | void setPluginNamespace(const char* libNamespace) noexcept override

FILE: real-esrgan/x4plus/preprocess.hpp
  type Preprocess (line 7) | struct Preprocess {
  type nvinfer1 (line 14) | namespace nvinfer1
    class PreprocessPluginV2 (line 16) | class PreprocessPluginV2 : public IPluginV2IOExt
      method PreprocessPluginV2 (line 19) | PreprocessPluginV2(const Preprocess& arg)
      method PreprocessPluginV2 (line 24) | PreprocessPluginV2(const void* data, size_t length)
      method PreprocessPluginV2 (line 31) | PreprocessPluginV2() = delete;
      method getNbOutputs (line 36) | int getNbOutputs() const noexcept override
      method Dims (line 41) | Dims getOutputDimensions(int index, const Dims* inputs, int nbInputD...
      method initialize (line 46) | int initialize() noexcept override
      method terminate (line 51) | void terminate() noexcept override
      method getWorkspaceSize (line 55) | size_t getWorkspaceSize(int maxBatchSize) const noexcept override
      method getSerializationSize (line 62) | size_t getSerializationSize() const noexcept override
      method serialize (line 69) | void serialize(void* buffer) const noexcept override
      method configurePlugin (line 77) | void configurePlugin(const PluginTensorDesc* in, int nbInput, const ...
      method supportsFormatCombination (line 82) | bool supportsFormatCombination(int pos, const PluginTensorDesc* inOu...
      method DataType (line 91) | DataType getOutputDataType(int index, const DataType* inputTypes, in...
      method destroy (line 107) | void destroy() noexcept override
      method IPluginV2Ext (line 112) | IPluginV2Ext* clone() const noexcept override
      method setPluginNamespace (line 118) | void setPluginNamespace(const char* libNamespace) noexcept override
      method isOutputBroadcastAcrossBatch (line 128) | bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* input...
      method canBroadcastInputAcrossBatch (line 133) | bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept ove...
      method write (line 140) | void write(char*& buffer, const T& val) const
      method T (line 147) | T read(const char*& buffer) const
    class PreprocessPluginV2Creator (line 159) | class PreprocessPluginV2Creator : public IPluginCreator
      method PluginFieldCollection (line 172) | const PluginFieldCollection* getFieldNames() noexcept override
      method IPluginV2 (line 177) | IPluginV2* createPlugin(const char* name, const PluginFieldCollectio...
      method IPluginV2 (line 184) | IPluginV2* deserializePlugin(const char* name, const void* serialDat...
      method setPluginNamespace (line 191) | void setPluginNamespace(const char* libNamespace) noexcept override

FILE: real-esrgan/x4plus/real-esrgan.cpp
  function ICudaEngine (line 25) | ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 131) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, st...
  function doInference (line 150) | void doInference(IExecutionContext& context, cudaStream_t& stream, void ...
  function parse_args (line 157) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 176) | int main(int argc, char** argv) {

FILE: real-esrgan/x4plus/utils.h
  function read_files_in_dir (line 7) | static inline int read_files_in_dir(const char *p_dir_name, std::vector<...

FILE: refinedet/calibrator.h
  function class (line 13) | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2

FILE: refinedet/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 448) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 472) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 484) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 497) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: refinedet/refinedet.cpp
  function RoiCorrect (line 22) | void RoiCorrect(const cv::Mat &m, cv::Rect &r)
  function loadWeights (line 39) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 77) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 111) | ILayer* convRelu(INetworkDefinition *network, std::map<std::string, Weig...
  function ILayer (line 136) | ILayer* convRelu_extras(INetworkDefinition *network, std::map<std::strin...
  function IConvolutionLayer (line 155) | IConvolutionLayer* convReluconv_tcb0(INetworkDefinition *network, std::m...
  function ILayer (line 187) | ILayer* ReluconvRelu_tcb2(INetworkDefinition *network, std::map<std::str...
  function ILayer (line 208) | ILayer* conv_permutation(INetworkDefinition *network, std::map<std::stri...
  function ILayer (line 225) | ILayer* cat_4_tensor(INetworkDefinition *network, ILayer*tensor_0, ILaye...
  function ILayer (line 267) | ILayer* reshapeSoftmax(INetworkDefinition *network, ITensor& input, int ...
  function IScaleLayer (line 294) | IScaleLayer* L2norm(INetworkDefinition *network, std::map<std::string, W...
  function ILayer (line 359) | ILayer* convBnLeaky(INetworkDefinition *network, std::map<std::string, W...
  function ICudaEngine (line 375) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 536) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function PriorBox (line 553) | torch::Tensor PriorBox()
  function decode (line 600) | torch::Tensor decode(const torch::Tensor _loc,torch::Tensor _prior,bool ...
  function center (line 621) | torch::Tensor center(torch::Tensor retv)
  function nms (line 632) | bool nms(const torch::Tensor& boxes, const torch::Tensor& scores, torch:...
  function doInference (line 697) | void doInference(IExecutionContext& context, void* buffers[], cudaStream...
  function base_transform (line 822) | void base_transform(const cv::Mat &m_src,float *data)
  function main (line 841) | int main(int argc, char** argv) {

FILE: refinedet/utils.h
  function namespace (line 24) | namespace Tn
  function read_files_in_dir (line 41) | static inline int read_files_in_dir(const char *p_dir_name, std::vector<...

FILE: repvgg/gen_wts.py
  function main (line 7) | def main(args):

FILE: repvgg/logging.h
  function class (line 9) | class Logger : public nvinfer1::ILogger

FILE: repvgg/repvgg.cpp
  function loadWeights (line 84) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IActivationLayer (line 123) | IActivationLayer *RepVGGBlock(INetworkDefinition *network, std::map<std:...
  function IActivationLayer (line 135) | IActivationLayer *makeStage(INetworkDefinition *network, std::map<std::s...
  function ICudaEngine (line 152) | ICudaEngine *createEngine(std::string netName, unsigned int maxBatchSize...
  function APIToModel (line 209) | void APIToModel(std::string netName, unsigned int maxBatchSize, IHostMem...
  function doInference (line 228) | void doInference(IExecutionContext &context, float *input, float *output...
  function main (line 262) | int main(int argc, char **argv)

FILE: resnet/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: resnet/resnet18.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IScaleLayer (line 77) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IActivationLayer (line 111) | IActivationLayer* basicBlock(INetworkDefinition *network, std::map<std::...
  function ICudaEngine (line 146) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 213) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 232) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 266) | int main(int argc, char** argv)

FILE: resnet/resnet34.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IScaleLayer (line 78) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IActivationLayer (line 112) | IActivationLayer* basicBlock(INetworkDefinition* network, std::map<std::...
  function ICudaEngine (line 150) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 217) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 236) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 271) | int main(int argc, char** argv)

FILE: resnet/resnet50.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IScaleLayer (line 77) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IActivationLayer (line 111) | IActivationLayer* bottleneck(INetworkDefinition *network, std::map<std::...
  function ICudaEngine (line 154) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 231) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 250) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 284) | int main(int argc, char** argv)

FILE: resnet/resnet50.py
  function load_weights (line 25) | def load_weights(file):
  function addBatchNorm2d (line 49) | def addBatchNorm2d(network, weight_map, input, layer_name, eps):
  function bottleneck (line 64) | def bottleneck(network, weight_map, input, in_channels, out_channels, st...
  function create_engine (line 141) | def create_engine(maxBatchSize, builder, config, dt):
  function APIToModel (line 233) | def APIToModel(maxBatchSize):
  function doInference (line 245) | def doInference(context, host_in, host_out, batchSize):

FILE: resnet/resnext50_32x4d.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IScaleLayer (line 77) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IActivationLayer (line 111) | IActivationLayer* bottleneck(INetworkDefinition *network, std::map<std::...
  function ICudaEngine (line 157) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 233) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 252) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 286) | int main(int argc, char** argv)

FILE: resnet/wide_resnet50.py
  function load_weights (line 26) | def load_weights(file):
  function addBatchNorm2d (line 50) | def addBatchNorm2d(network, weight_map, inputs, layer_name, eps):
  function bottleneck (line 66) | def bottleneck(network, weight_map, input, in_channels, out_channels, st...
  function create_engine (line 135) | def create_engine(maxBatchSize, builder, config, dt):
  function APIToModel (line 213) | def APIToModel(maxBatchSize):
  function doInference (line 225) | def doInference(context, host_in, host_out, batchSize):

FILE: resnet/wideresnet50.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IScaleLayer (line 77) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IActivationLayer (line 111) | IActivationLayer* bottleneck(INetworkDefinition *network, std::map<std::...
  function ICudaEngine (line 154) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 230) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 249) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 283) | int main(int argc, char** argv)

FILE: retinaface/calibrator.h
  function class (line 14) | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2

FILE: retinaface/common.hpp
  function preprocess_img (line 21) | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int inpu...
  function read_files_in_dir (line 43) | static inline int read_files_in_dir(const char *p_dir_name, std::vector<...
  function get_rect_adapt_landmark (line 65) | static inline cv::Rect get_rect_adapt_landmark(cv::Mat& img, int input_w...
  function iou (line 91) | static float iou(float lbox[4], float rbox[4]) {
  function cmp (line 106) | static bool cmp(const decodeplugin::Detection& a, const decodeplugin::De...
  function nms (line 110) | static inline void nms(std::vector<decodeplugin::Detection>& res, float ...
  function loadWeights (line 135) | static inline std::map<std::string, Weights> loadWeights(const std::stri...
  function Weights (line 173) | static inline Weights getWeights(std::map<std::string, Weights>& weightM...
  function IScaleLayer (line 181) | static inline IScaleLayer* addBatchNorm2d(INetworkDefinition *network, s...

FILE: retinaface/decode.h
  function namespace (line 9) | namespace decodeplugin
  function class (line 22) | class DecodePlugin: public IPluginV2IOExt

FILE: retinaface/logging.h
  function class (line 33) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 108) | class LogStreamConsumerBase
  function std (line 162) | static std::string severityPrefix(Severity severity)
  function TestResult (line 215) | enum class TestResult
  function LogStreamConsumer (line 449) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 461) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 473) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 485) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 498) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: retinaface/retina_mnet.cpp
  function ILayer (line 27) | ILayer* conv_bn(INetworkDefinition *network, std::map<std::string, Weigh...
  function ILayer (line 40) | ILayer* conv_bn_no_relu(INetworkDefinition *network, std::map<std::strin...
  function ILayer (line 50) | ILayer* conv_bn1X1(INetworkDefinition *network, std::map<std::string, We...
  function ILayer (line 63) | ILayer* conv_dw(INetworkDefinition *network, std::map<std::string, Weigh...
  function IActivationLayer (line 83) | IActivationLayer* ssh(INetworkDefinition *network, std::map<std::string,...
  function ICudaEngine (line 97) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 225) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 243) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 276) | int main(int argc, char** argv) {

FILE: retinaface/retina_r50.cpp
  function IActivationLayer (line 27) | IActivationLayer* bottleneck(INetworkDefinition *network, std::map<std::...
  function ILayer (line 69) | ILayer* conv_bn_relu(INetworkDefinition *network, std::map<std::string, ...
  function IActivationLayer (line 87) | IActivationLayer* ssh(INetworkDefinition *network, std::map<std::string,...
  function ICudaEngine (line 101) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 244) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 262) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 295) | int main(int argc, char** argv) {

FILE: retinaface/retinaface_trt.py
  function plot_one_box (line 26) | def plot_one_box(x, landmark,img, color=None, label=None, line_thickness...
  class Retinaface_trt (line 71) | class Retinaface_trt(object):
    method __init__ (line 76) | def __init__(self, engine_file_path):
    method infer (line 120) | def infer(self, input_image_path):
    method destroy (line 177) | def destroy(self):
    method preprocess_image (line 181) | def preprocess_image(self, input_image_path):
    method xywh2xyxy (line 229) | def xywh2xyxy(self, origin_h, origin_w, x,landmark):
    method post_process (line 271) | def post_process(self, output, origin_h, origin_w):
  class myThread (line 312) | class myThread(threading.Thread):
    method __init__ (line 313) | def __init__(self, func, args):
    method run (line 318) | def run(self):

FILE: retinafaceAntiCov/decode.h
  function namespace (line 12) | namespace decodeplugin
  function class (line 34) | class DecodePlugin: public IPluginV2IOExt

FILE: retinafaceAntiCov/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: retinafaceAntiCov/retinafaceAntiCov.cpp
  function preprocess_img (line 41) | cv::Mat preprocess_img(cv::Mat& img) {
  function get_rect_adapt_landmark (line 63) | cv::Rect get_rect_adapt_landmark(cv::Mat& img, float bbox[4], float lmk[...
  function iou (line 89) | float iou(float lbox[4], float rbox[4]) {
  function cmp (line 104) | bool cmp(decodeplugin::Detection& a, decodeplugin::Detection& b) {
  function nms (line 108) | void nms(std::vector<decodeplugin::Detection>& res, float *output, float...
  function loadWeights (line 133) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 171) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 204) | ILayer* convBnRelu(INetworkDefinition *network, std::map<std::string, We...
  function ILayer (line 217) | ILayer* convBiasBnRelu(INetworkDefinition *network, std::map<std::string...
  function ILayer (line 228) | ILayer* head(INetworkDefinition *network, std::map<std::string, Weights>...
  function ILayer (line 256) | ILayer* reshapeSoftmax(INetworkDefinition *network, ITensor& input, int ...
  function ICudaEngine (line 272) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 405) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 422) | void doInference(IExecutionContext& context, float* input, float* output...
  function read_files_in_dir (line 455) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...
  function main (line 477) | int main(int argc, char** argv) {

FILE: scaled-yolov4/common.hpp
  function preprocess_img (line 14) | cv::Mat preprocess_img(cv::Mat& img) {
  function get_rect (line 36) | cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
  function iou (line 62) | float iou(float lbox[4], float rbox[4]) {
  function cmp (line 77) | bool cmp(const Yolo::Detection& a, const Yolo::Detection& b) {
  function nms (line 81) | void nms(std::vector<Yolo::Detection>& res, float *output, float conf_th...
  function loadWeights (line 110) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 148) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 181) | ILayer* convBnMish(INetworkDefinition *network, std::map<std::string, We...

FILE: scaled-yolov4/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 108) | class LogStreamConsumerBase
  function std (line 162) | static std::string severityPrefix(Severity severity)
  function TestResult (line 215) | enum class TestResult
  function LogStreamConsumer (line 451) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 463) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 475) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 487) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 500) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: scaled-yolov4/mish.h
  function class (line 10) | class MishPlugin: public IPluginV2IOExt

FILE: scaled-yolov4/utils.h
  function namespace (line 22) | namespace Tn

FILE: scaled-yolov4/yololayer.h
  function namespace (line 8) | namespace Yolo
  function class (line 53) | class YoloLayerPlugin: public IPluginV2IOExt

FILE: scaled-yolov4/yolov4_csp.cpp
  function ICudaEngine (line 28) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 331) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 351) | void doInference(IExecutionContext& context, float* input, float* output...
  function read_files_in_dir (line 384) | int read_files_in_dir(const char* p_dir_name, std::vector<std::string> &...
  function main (line 402) | int main(int argc, char** argv){

FILE: senet/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: senet/se_resnet50.cpp
  function loadWeights (line 38) | std::map<std::string, Weights> loadWeights(const std::string file)
  function IScaleLayer (line 77) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 111) | ILayer* seLayer(INetworkDefinition *network, std::map<std::string, Weigh...
  function IActivationLayer (line 124) | IActivationLayer* bottleneck(INetworkDefinition *network, std::map<std::...
  function ICudaEngine (line 169) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 245) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 264) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 298) | int main(int argc, char** argv)

FILE: shufflenetv2/gen_wts.py
  function read_imagenet_labels (line 14) | def read_imagenet_labels() -> dict[int, str]:
  function preprocess (line 29) | def preprocess(img: np.array) -> torch.Tensor:

FILE: shufflenetv2/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: shufflenetv2/shufflenetv2.cpp
  type ShuffleNetV2Params (line 12) | struct ShuffleNetV2Params {
  function Dims (line 52) | Dims debug_shape(const ILayer* l) {
  function ILayer (line 62) | ILayer* addBatchNorm2d(INetworkDefinition* network, WeightMap& weightMap...
  function ILayer (line 107) | ILayer* CBR(INetworkDefinition* network, WeightMap& m, ITensor& input, c...
  function ILayer (line 145) | ILayer* invertedRes(INetworkDefinition* net, WeightMap& m, ITensor& inpu...
  function ICudaEngine (line 215) | ICudaEngine* createEngine(int32_t N, IRuntime* runtime, IBuilder* builde...
  function APIToModel (line 298) | void APIToModel(int32_t N, IRuntime* runtime, IHostMemory** modelStream) {
  function doInference (line 321) | auto doInference(IExecutionContext& context, void* input, int64_t batchS...
  function main (line 379) | int main(int argc, char** argv) {

FILE: shufflenetv2/utils.h
  function cudaDeviceProp (line 28) | cudaDeviceProp prop{}
  function std (line 90) | static std::vector<float> preprocess_img(cv::Mat& img, bool bgr2rgb, con...
  function loadImagenetLabelMap (line 137) | int, std::string> loadImagenetLabelMap(const std::string& path) {
  type ScaleParams (line 165) | struct ScaleParams {
  function Weights (line 176) | const Weights scale{DataType::kFLOAT, params->scale.data(), 3ll};
  function getSize (line 233) | static size_t getSize(DataType dt) {

FILE: squeezenet/gen_wts.py
  function read_imagenet_labels (line 9) | def read_imagenet_labels() -> dict[int, str]:
  function preprocess (line 24) | def preprocess(img: np.array) -> torch.Tensor:
  function main (line 43) | def main():

FILE: squeezenet/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: squeezenet/squeezenet.cpp
  function ILayer (line 31) | ILayer* fire(INetworkDefinition* network, WeightMap& m, ITensor& input, ...
  function ICudaEngine (line 58) | ICudaEngine* createEngine(int32_t N, IRuntime* runtime, IBuilder* builde...
  function APIToModel (line 149) | void APIToModel(int32_t N, IRuntime* runtime, IHostMemory** modelStream) {
  function doInference (line 172) | std::vector<std::vector<float>> doInference(IExecutionContext& context, ...
  function main (line 230) | int main(int argc, char** argv) {

FILE: squeezenet/utils.h
  function cudaDeviceProp (line 34) | cudaDeviceProp prop{}
  function std (line 94) | static std::vector<float> preprocess_img(cv::Mat& img, bool bgr2rgb, con...
  function loadImagenetLabelMap (line 140) | int, std::string> loadImagenetLabelMap(const std::string& path) {
  type ScaleParams (line 168) | struct ScaleParams {
  function Weights (line 178) | const Weights shift{DataType::kFLOAT, params->shift.data(), 3ll};
  function getSize (line 238) | static size_t getSize(DataType dt) {

FILE: superpoint/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 104) | class LogStreamConsumerBase
  function std (line 156) | static std::string severityPrefix(Severity severity)
  function TestResult (line 216) | enum class TestResult
  function LogStreamConsumer (line 461) | inline LogStreamConsumer LOG_VERBOSE(const Logger &logger)
  function LogStreamConsumer (line 473) | inline LogStreamConsumer LOG_INFO(const Logger &logger)
  function LogStreamConsumer (line 485) | inline LogStreamConsumer LOG_WARN(const Logger &logger)
  function LogStreamConsumer (line 497) | inline LogStreamConsumer LOG_ERROR(const Logger &logger)
  function LogStreamConsumer (line 510) | inline LogStreamConsumer LOG_FATAL(const Logger &logger)

FILE: superpoint/supernet.cpp
  function ICudaEngine (line 28) | ICudaEngine *createEngine(IBuilder *builder, IBuilderConfig *config, std...
  function APIToModel (line 161) | void APIToModel(std::string path, IHostMemory **modelStream)
  function main (line 179) | int main(int argc, char **argv)

FILE: superpoint/utils.cpp
  function loadWeights (line 8) | std::map<std::string, Weights> loadWeights(const std::string file)
  function read_files_in_dir (line 46) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...
  function tokenize (line 72) | void tokenize(const std::string &str, std::vector<std::string> &tokens, ...

FILE: swin-transformer/semantic-segmentation/UpsamplePlugin.cpp
  function writeToBuffer (line 23) | void writeToBuffer(char*& buffer, const T& val)
  function T (line 31) | T readFromBuffer(const char*& buffer)
  function Dims (line 81) | Dims UpsamplePlugin::getOutputDimensions(int index, const Dims* inputs, ...
  function IPluginV2 (line 166) | IPluginV2* UpsamplePlugin::clone() const
  function PluginFieldCollection (line 198) | const PluginFieldCollection* UpsamplePluginCreator::getFieldNames()
  function IPluginV2 (line 203) | IPluginV2* UpsamplePluginCreator::createPlugin(const char* name, const P...
  function IPluginV2 (line 222) | IPluginV2* UpsamplePluginCreator::deserializePlugin(const char* name, co...

FILE: swin-transformer/semantic-segmentation/UpsamplePlugin.h
  function getNbOutputs (line 21) | int getNbOutputs() const override;
  function getSerializationSize (line 33) | size_t getSerializationSize() const override;

FILE: swin-transformer/semantic-segmentation/common.hpp
  function mblobFromImages (line 29) | void mblobFromImages(cv::InputArrayOfArrays images_, cv::OutputArray blob_,
  function BlobFromImages (line 116) | cv::Mat BlobFromImages(cv::InputArrayOfArrays images, cv::Size size,
  function debug_print (line 124) | void debug_print(ITensor *input_tensor,std::string head)
  function loadWeights (line 135) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function ITensor (line 173) | ITensor* m_layerNorm(INetworkDefinition *m_Network,std::map<std::string,...
  function ITensor (line 205) | ITensor* layerNorm(INetworkDefinition *m_Network,std::map<std::string, W...
  function ITensor (line 262) | ITensor* conv(INetworkDefinition *m_Network,std::map<std::string, Weight...
  function ITensor (line 275) | ITensor* shuffle_reshape(INetworkDefinition *m_Network,ITensor *input,Di...
  function ITensor (line 282) | ITensor* shuffle_permute(INetworkDefinition *m_Network,ITensor *input,Pe...
  function ITensor (line 289) | ITensor* shuffle_reshapeApermute(INetworkDefinition *m_Network,ITensor *...
  function ITensor (line 301) | ITensor* trt_transform_imgMask(INetworkDefinition *m_Network,int hw, int...
  function ITensor (line 362) | ITensor* trt_transform_pad(INetworkDefinition *m_Network,ITensor *input,...
  function ITensor (line 402) | ITensor* trt_swinRoll(INetworkDefinition *m_Network,ITensor *input,vecto...
  function ITensor (line 495) | ITensor* trt_transform_window_partition(INetworkDefinition *m_Network,IT...
  function ITensor (line 516) | ITensor* trt_swinLinear(INetworkDefinition *m_Network,std::map<std::stri...
  function ITensor (line 570) | ITensor* trt_trainsform_WindowAttention(INetworkDefinition *m_Network,st...
  function ITensor (line 684) | ITensor* trt_window_reverse(INetworkDefinition *m_Network, ITensor *inpu...
  function ITensor (line 695) | ITensor* gelu(INetworkDefinition *m_Network,ITensor *input)
  function ITensor (line 713) | ITensor* trt_transform_mlp(INetworkDefinition *m_Network,std::map<std::s...
  function ITensor (line 725) | ITensor* blk(INetworkDefinition *m_Network,std::map<std::string, Weights...
  function ITensor (line 773) | ITensor* downsample(INetworkDefinition* m_Network,std::map<std::string, ...
  function ITensor (line 791) | ITensor* addBatchNorm2d(
  function ITensor (line 829) | ITensor* transform_lateral_conv(INetworkDefinition* m_Network,std::map<s...
  function ITensor (line 841) | ITensor* resize(INetworkDefinition* m_Network, ITensor* input, int grid)
  function ITensor (line 867) | ITensor* transform_psp(INetworkDefinition* m_Network,std::map<std::strin...
  function ITensor (line 880) | ITensor* up_Add(INetworkDefinition* m_Network,ITensor* input1,ITensor* i...

FILE: swin-transformer/semantic-segmentation/fillmask.h
  function namespace (line 12) | namespace nvinfer1

FILE: swin-transformer/semantic-segmentation/gelu.h
  function namespace (line 11) | namespace nvinfer1

FILE: swin-transformer/semantic-segmentation/include/dirent.h
  type _wdirent (line 222) | struct _wdirent {
  type _wdirent (line 241) | typedef struct _wdirent _wdirent;
  type _WDIR (line 243) | struct _WDIR {
  type _WDIR (line 259) | typedef struct _WDIR _WDIR;
  type dirent (line 262) | struct dirent {
  type dirent (line 281) | typedef struct dirent dirent;
  type DIR (line 283) | struct DIR {
  type DIR (line 287) | typedef struct DIR DIR;
  type dirent (line 294) | struct dirent
  type _wdirent (line 295) | struct _wdirent
  type dirent (line 298) | struct dirent
  type dirent (line 298) | struct dirent
  type _wdirent (line 300) | struct _wdirent
  type _wdirent (line 300) | struct _wdirent
  type dirent (line 308) | struct dirent
  type dirent (line 309) | struct dirent
  type dirent (line 310) | struct dirent
  type dirent (line 310) | struct dirent
  type dirent (line 312) | struct dirent
  type dirent (line 312) | struct dirent
  type dirent (line 314) | struct dirent
  type dirent (line 314) | struct dirent
  function _WDIR (line 352) | static _WDIR*
  type _wdirent (line 453) | struct _wdirent
  type _wdirent (line 457) | struct _wdirent
  function _wreaddir_r (line 475) | static int
  function _wclosedir (line 537) | static int
  function _wrewinddir (line 570) | static void
  function WIN32_FIND_DATAW (line 586) | static WIN32_FIND_DATAW*
  function WIN32_FIND_DATAW (line 637) | static WIN32_FIND_DATAW*
  function DIR (line 676) | static DIR*
  type dirent (line 732) | struct dirent
  type dirent (line 736) | struct dirent
  function readdir_r (line 754) | static int
  function closedir (line 843) | static int
  function rewinddir (line 870) | static void
  function scandir (line 881) | static int
  function alphasort (line 1018) | static int
  function versionsort (line 1026) | static int
  function dirent_mbstowcs_s (line 1035) | static int
  function dirent_wcstombs_s (line 1087) | static int
  function dirent_set_errno (line 1139) | static void

FILE: swin-transformer/semantic-segmentation/layerNorm.h
  type welford (line 15) | struct welford
  function namespace (line 22) | namespace nvinfer1{

FILE: swin-transformer/semantic-segmentation/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: swin-transformer/semantic-segmentation/trainsform.cpp
  function ICudaEngine (line 27) | ICudaEngine *createEngine(unsigned int maxBatchSize, IBuilder *builder, ...
  function APIToModel (line 137) | void APIToModel(unsigned int maxBatchSize, IHostMemory **modelStream,std...
  function createEng (line 148) | void createEng(std::string wtsPath, std::string engine_name)
  function inference_init (line 177) | void inference_init(string ENGPath,ICudaEngine *m_engine)
  function doInference (line 208) | void doInference(const float *input, int *output)
  function main (line 223) | int main(int argc, char** argv)

FILE: swin-transformer/semantic-segmentation/utilsn.h
  function namespace (line 25) | namespace Tn

FILE: tsm/gen_wts.py
  function write_one_weight (line 8) | def write_one_weight(writer, name, weight):
  function convert_name (line 19) | def convert_name(name):
  function main (line 28) | def main(args):

FILE: tsm/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: tsm/test_shift.py
  function shift_mit (line 12) | def shift_mit(x, num_segments, shift_div=8):
  function shift_mmaction2 (line 32) | def shift_mmaction2(x, num_segments, shift_div=8):
  function _tensorrt_shift_module (line 79) | def _tensorrt_shift_module(network,
  function shift_tensorrt (line 152) | def shift_tensorrt(x, num_segments, shift_div, input_shape):

FILE: tsm/tsm_r50.cpp
  function loadWeights (line 43) | std::map<std::string, Weights> loadWeights(const std::string file)
  function print (line 82) | void print(char* name, ITensor* tensor) {
  function IConcatenationLayer (line 87) | IConcatenationLayer* addShift(INetworkDefinition *network, ITensor& inpu...
  function IScaleLayer (line 117) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function IActivationLayer (line 150) | IActivationLayer* bottleneck(INetworkDefinition *network, std::map<std::...
  function ICudaEngine (line 195) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 285) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 302) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 336) | int main(int argc, char** argv)

FILE: tsm/tsm_r50.py
  function load_weights (line 27) | def load_weights(file):
  function add_shift_module (line 51) | def add_shift_module(network, input, input_shape, num_segments=8, shift_...
  function add_batch_norm_2d (line 107) | def add_batch_norm_2d(network, weight_map, input, layer_name, eps):
  function bottleneck (line 122) | def bottleneck(network, weight_map, input, in_channels, out_channels, st...
  function create_engine (line 202) | def create_engine(maxBatchSize, builder, dt, weights):
  function do_inference (line 317) | def do_inference(context, host_in, host_out, batchSize):
  function inference_mmaction2 (line 331) | def inference_mmaction2(inputs, config, checkpoint):
  function main (line 349) | def main(args):

FILE: ufld/common.hpp
  function loadWeights (line 30) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 68) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 101) | ILayer* convBnLeaky( INetworkDefinition *network, std::map<std::string, ...
  function IActivationLayer (line 127) | IActivationLayer* basicBlock(INetworkDefinition *network, std::map<std::...
  function read_files_in_dir (line 162) | int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &...

FILE: ufld/lane_det.cpp
  function ICudaEngine (line 24) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder,I...
  function APIToModel (line 143) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
  function doInference (line 159) | void doInference(IExecutionContext& context, float* input, float* output...
  function prepareImage (line 194) | std::vector<float> prepareImage(cv::Mat & img)
  function softmax_mul (line 224) | void softmax_mul(float* x, float* y, int rows, int cols, int chan)
  function argmax (line 251) | void argmax(float* x, float* y, int rows, int cols, int chan)
  function main (line 272) | int main(int argc, char** argv)

FILE: ufld/logging.h
  function class (line 16) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 91) | class LogStreamConsumerBase
  function std (line 145) | static std::string severityPrefix(Severity severity)
  function TestResult (line 198) | enum class TestResult
  function LogStreamConsumer (line 432) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 444) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 456) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 468) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 481) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: unet/common.hpp
  function loadWeights (line 26) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 64) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...

FILE: unet/gen_wts.py
  function main (line 5) | def main():

FILE: unet/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 107) | class LogStreamConsumerBase
  function std (line 161) | static std::string severityPrefix(Severity severity)
  function TestResult (line 214) | enum class TestResult
  function LogStreamConsumer (line 448) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 472) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 484) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 497) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: unet/unet.cpp
  function ILayer (line 24) | ILayer* doubleConv(INetworkDefinition* network, std::map<std::string, We...
  function ILayer (line 42) | ILayer* down(INetworkDefinition* network, std::map<std::string, Weights>...
  function ILayer (line 51) | ILayer* up(INetworkDefinition* network, std::map<std::string, Weights>& ...
  function ILayer (line 97) | ILayer* outConv(INetworkDefinition* network, std::map<std::string, Weigh...
  function ICudaEngine (line 107) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 157) | void APIToModel(unsigned int maxBatchSize, IHostMemory** model_stream, s...
  function doInference (line 174) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 208) | int main(int argc, char** argv) {

FILE: vgg/logging.h
  function class (line 31) | class LogStreamConsumerBuffer : public std::stringbuf
  function class (line 106) | class LogStreamConsumerBase
  function std (line 160) | static std::string severityPrefix(Severity severity)
  function TestResult (line 213) | enum class TestResult
  function LogStreamConsumer (line 447) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
  function LogStreamConsumer (line 459) | inline LogStreamConsumer LOG_INFO(const Logger& logger)
  function LogStreamConsumer (line 471) | inline LogStreamConsumer LOG_WARN(const Logger& logger)
  function LogStreamConsumer (line 483) | inline LogStreamConsumer LOG_ERROR(const Logger& logger)
  function LogStreamConsumer (line 496) | inline LogStreamConsumer LOG_FATAL(const Logger& logger)

FILE: vgg/vgg11.cpp
  function loadWeights (line 37) | std::map<std::string, Weights> loadWeights(const std::string file)
  function ICudaEngine (line 77) | ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 159) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  function doInference (line 178) | void doInference(IExecutionContext& context, float* input, float* output...
  function main (line 212) | int main(int argc, char** argv)

FILE: vit/cuda_allocator.cc
  type CudaOutputAllocator::Allocation (line 15) | struct CudaOutputAllocator::Allocation {
  function getCudaRuntimeVersion (line 24) | static auto getCudaRuntimeVersion() -> int {
  function getCudaDriverVersion (line 32) | static auto getCudaDriverVersion() -> int {
  function OutputAllocKind (line 238) | OutputAllocKind CudaOutputAllocator::kind() const {

FILE: vit/cuda_allocator.h
  type class (line 11) | enum class
  type Allocation (line 34) | struct Allocation
  function cudaStream_t (line 38) | cudaStream_t stream_{}
  function OutputAllocKind (line 39) | OutputAllocKind kind_{OutputAllocKind::kCudaMallocManaged};

FILE: vit/gen_wts.py
  function read_imagenet_labels (line 9) | def read_imagenet_labels() -> dict[int, str]:

FILE: vit/logging.h
  function class (line 34) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 94) | class LogStreamConsumerBase {
  function std (line 140) | static std::string severityPrefix(Severity severity) {
  type TestInfo (line 188) | struct TestInfo
  function TestResult (line 197) | enum class TestResult : std::uint8_t {
  function LogStreamConsumer (line 460) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: vit/profiler.h
  type Record (line 10) | struct Record {

FILE: vit/utils.h
  function cudaDeviceProp (line 30) | cudaDeviceProp prop{}

FILE: vit/vit.cc
  function bytesPerElement (line 33) | static auto bytesPerElement(DataType t) -> std::size_t {
  function convertWeightMapToHalf (line 55) | static void convertWeightMapToHalf(WeightMap& w) {
  type ViTParam (line 76) | struct ViTParam {
  function addGeLU (line 82) | static auto addGeLU(INetworkDefinition* net, ITensor& input) -> ILayer* {
  function addLinearNorm (line 118) | static auto addLinearNorm(INetworkDefinition* net, ITensor& input, ITens...
  function ViTLayer (line 129) | auto ViTLayer(INetworkDefinition* net, WeightMap& w, ITensor& input, con...
  function createEngine (line 253) | auto createEngine(int64_t N, IRuntime* runtime, IBuilder* builder, IBuil...
  function doInference (line 344) | std::vector<std::vector<float>> doInference(IExecutionContext& context, ...
  function APIToModel (line 445) | void APIToModel(int32_t N, IRuntime* runtime, IHostMemory** modelStream) {
  function main (line 465) | auto main(int argc, char** argv) -> int {

FILE: yolo11/gen_wts.py
  function parse_args (line 8) | def parse_args():

FILE: yolo11/include/calibrator.h
  function class (line 14) | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {

FILE: yolo11/include/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: yolo11/include/types.h
  function Detection (line 4) | struct alignas(float) Detection {
  type AffineMatrix (line 14) | struct AffineMatrix {

FILE: yolo11/include/utils.h
  function cv (line 6) | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int inpu...
  function read_files_in_dir (line 28) | static inline int read_files_in_dir(const char* p_dir_name, std::vector<...
  function std (line 51) | static inline std::string trim_leading_whitespace(const std::string& str) {
  function read_labels (line 68) | static inline int read_labels(const std::string labels_filename, std::un...

FILE: yolo11/plugin/yololayer.h
  function namespace (line 9) | namespace nvinfer1 {

FILE: yolo11/src/block.cpp
  function loadWeights (line 10) | std::map<std::string, nvinfer1::Weights> loadWeights(const std::string f...

FILE: yolo11/src/model.cpp
  function get_width (line 9) | static int get_width(int x, float gw, int max_channels, int divisor = 8) {
  function get_depth (line 15) | static int get_depth(int x, float gd) {
  function calculateStrides (line 24) | void calculateStrides(nvinfer1::IElementWiseLayer* conv_layers[], int si...

FILE: yolo11/src/postprocess.cpp
  function get_rect (line 4) | cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
  function get_rect_obb (line 36) | cv::Rect get_rect_obb(cv::Mat& img, float bbox[4]) {
  function get_rect_adapt_landmark (line 68) | cv::Rect get_rect_adapt_landmark(cv::Mat& img, float bbox[4], float lmk[...
  function iou (line 101) | static float iou(float lbox[4], float rbox[4]) {
  function cmp (line 117) | static bool cmp(const Detection& a, const Detection& b) {
  function nms (line 124) | void nms(std::vector<Detection>& res, float* output, float conf_thresh, ...
  function batch_nms (line 153) | void batch_nms(std::vector<std::vector<Detection>>& res_batch, float* ou...
  function process_decode_ptr_host (line 161) | void process_decode_ptr_host(std::vector<Detection>& res, const float* d...
  function batch_process (line 179) | void batch_process(std::vector<std::vector<Detection>>& res_batch, const...
  function draw_bbox (line 190) | void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<...
  function draw_bbox_keypoints_line (line 203) | void draw_bbox_keypoints_line(std::vector<cv::Mat>& img_batch, std::vect...
  function scale_mask (line 237) | cv::Mat scale_mask(cv::Mat mask, cv::Mat img) {
  function draw_mask_bbox (line 258) | void draw_mask_bbox(cv::Mat& img, std::vector<Detection>& dets, std::vec...
  function process_decode_ptr_host_obb (line 303) | void process_decode_ptr_host_obb(std::vector<Detection>& res, const floa...
  function batch_process_obb (line 322) | void batch_process_obb(std::vector<std::vector<Detection>>& res_batch, c...
  function convariance_matrix (line 333) | std::tuple<float, float, float> convariance_matrix(Detection res) {
  function probiou (line 354) | static float probiou(const Detection& res1, const Detection& res2, float...
  function nms_obb (line 387) | void nms_obb(std::vector<Detection>& res, float* output, float conf_thre...
  function batch_nms_obb (line 417) | void batch_nms_obb(std::vector<std::vector<Detection>>& res_batch, float...
  function get_corner (line 425) | static std::vector<cv::Point> get_corner(cv::Mat& img, const Detection& ...
  function draw_bbox_obb (line 504) | void draw_bbox_obb(std::vector<cv::Mat>& img_batch, std::vector<std::vec...

FILE: yolo11/yolo11_cls.cpp
  function batch_preprocess (line 19) | void batch_preprocess(std::vector<cv::Mat>& imgs, float* output, int dst...
  function softmax (line 48) | std::vector<float> softmax(float* prob, int n) {
  function topk (line 63) | std::vector<int> topk(const std::vector<float>& vec, int k) {
  function read_classes (line 80) | std::vector<std::string> read_classes(std::string file_name) {
  function parse_args (line 95) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function prepare_buffers (line 140) | void prepare_buffers(ICudaEngine* engine, float** gpu_input_buffer, floa...
  function infer (line 157) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function serialize_engine (line 167) | void serialize_engine(float& gd, float& gw, std::string& wts_name, std::...
  function deserialize_engine (line 191) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function main (line 216) | int main(int argc, char** argv) {

FILE: yolo11/yolo11_cls_trt.py
  function get_img_path_batches (line 17) | def get_img_path_batches(batch_size, img_dir):
  class YoLo11TRT (line 35) | class YoLo11TRT(object):
    method __init__ (line 40) | def __init__(self, engine_file_path):
    method infer (line 90) | def infer(self, raw_image_generator):
    method destroy (line 137) | def destroy(self):
    method get_raw_image (line 141) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 148) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_cls_image (line 155) | def preprocess_cls_image(self, raw_bgr_image, dst_width=224, dst_heigh...
    method postprocess_cls (line 202) | def postprocess_cls(self, output_data):
  class inferThread (line 219) | class inferThread(threading.Thread):
    method __init__ (line 220) | def __init__(self, yolo11_wrapper, image_path_batch):
    method run (line 225) | def run(self):
  class warmUpThread (line 237) | class warmUpThread(threading.Thread):
    method __init__ (line 238) | def __init__(self, yolo11_wrapper):
    method run (line 242) | def run(self):

FILE: yolo11/yolo11_det.cpp
  function serialize_engine (line 16) | void serialize_engine(std::string& wts_name, std::string& engine_name, f...
  function deserialize_engine (line 37) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 62) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 88) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function parse_args (line 115) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 162) | int main(int argc, char** argv) {

FILE: yolo11/yolo11_det_trt.py
  function get_img_path_batches (line 25) | def get_img_path_batches(batch_size, img_dir):
  function plot_one_box (line 39) | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  class YoLo11TRT (line 76) | class YoLo11TRT(object):
    method __init__ (line 81) | def __init__(self, engine_file_path):
    method infer (line 130) | def infer(self, raw_image_generator):
    method destroy (line 189) | def destroy(self):
    method get_raw_image (line 193) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 200) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_image (line 207) | def preprocess_image(self, raw_bgr_image):
    method xywh2xyxy (line 255) | def xywh2xyxy(self, origin_h, origin_w, x):
    method post_process (line 283) | def post_process(self, output, origin_h, origin_w):
    method bbox_iou (line 308) | def bbox_iou(self, box1, box2, x1y1x2y2=True):
    method non_max_suppression (line 345) | def non_max_suppression(self, prediction, origin_h, origin_w, conf_thr...
  class inferThread (line 384) | class inferThread(threading.Thread):
    method __init__ (line 385) | def __init__(self, yolo11_wrapper, image_path_batch):
    method run (line 390) | def run(self):
  class warmUpThread (line 400) | class warmUpThread(threading.Thread):
    method __init__ (line 401) | def __init__(self, yolo11_wrapper):
    method run (line 405) | def run(self):

FILE: yolo11/yolo11_obb.cpp
  function serialize_engine (line 16) | void serialize_engine(std::string& wts_name, std::string& engine_name, s...
  function deserialize_engine (line 37) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 62) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 90) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function parse_args (line 117) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 163) | int main(int argc, char** argv) {

FILE: yolo11/yolo11_obb_trt.py
  class Detection (line 28) | class Detection:
    method __init__ (line 29) | def __init__(self, bbox, score, class_id, angle):
  function get_img_path_batches (line 36) | def get_img_path_batches(batch_size, img_dir):
  function get_corner (line 50) | def get_corner(img, box: Detection):
  function get_rect_obb (line 122) | def get_rect_obb(img, bbox):
  function plot_one_box (line 162) | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  class YoLo11TRT (line 203) | class YoLo11TRT(object):
    method __init__ (line 208) | def __init__(self, engine_file_path):
    method infer (line 259) | def infer(self, raw_image_generator):
    method destroy (line 322) | def destroy(self):
    method get_raw_image (line 326) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 333) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_image (line 340) | def preprocess_image(self, raw_bgr_image):
    method xywh2xyxy (line 388) | def xywh2xyxy(self, origin_h, origin_w, x):
    method covariance_matrix (line 416) | def covariance_matrix(self, res: Detection):
    method probiou (line 445) | def probiou(self, box1: Detection, box2: Detection, eps=1e-7):
    method post_process (line 479) | def post_process(self, output, origin_h, origin_w):
  class inferThread (line 534) | class inferThread(threading.Thread):
    method __init__ (line 535) | def __init__(self, yolo11_wrapper, image_path_batch):
    method run (line 540) | def run(self):
  class warmUpThread (line 550) | class warmUpThread(threading.Thread):
    method __init__ (line 551) | def __init__(self, yolo11_wrapper):
    method run (line 555) | def run(self):

FILE: yolo11/yolo11_pose.cpp
  function serialize_engine (line 16) | void serialize_engine(std::string& wts_name, std::string& engine_name, s...
  function deserialize_engine (line 37) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 62) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 88) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function parse_args (line 115) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 161) | int main(int argc, char** argv) {

FILE: yolo11/yolo11_pose_trt.py
  function get_img_path_batches (line 31) | def get_img_path_batches(batch_size, img_dir):
  function plot_one_box (line 45) | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  class YoLo11TRT (line 82) | class YoLo11TRT(object):
    method __init__ (line 87) | def __init__(self, engine_file_path):
    method infer (line 135) | def infer(self, raw_image_generator):
    method destroy (line 214) | def destroy(self):
    method get_raw_image (line 218) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 225) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_image (line 232) | def preprocess_image(self, raw_bgr_image):
    method xywh2xyxy_with_keypoints (line 280) | def xywh2xyxy_with_keypoints(self, origin_h, origin_w, boxes, keypoints):
    method post_process (line 317) | def post_process(self, output, origin_h, origin_w):
    method bbox_iou (line 353) | def bbox_iou(self, box1, box2, x1y1x2y2=True):
    method non_max_suppression (line 390) | def non_max_suppression(self, prediction, origin_h, origin_w, conf_thr...
  class inferThread (line 435) | class inferThread(threading.Thread):
    method __init__ (line 436) | def __init__(self, yolo11_wrapper, image_path_batch):
    method run (line 441) | def run(self):
  class warmUpThread (line 452) | class warmUpThread(threading.Thread):
    method __init__ (line 453) | def __init__(self, yolo11_wrapper):
    method run (line 457) | def run(self):

FILE: yolo11/yolo11_seg.cpp
  function get_downscale_rect (line 17) | static cv::Rect get_downscale_rect(float bbox[4], float scale) {
  function process_mask (line 36) | std::vector<cv::Mat> process_mask(const float* proto, int proto_size, st...
  function serialize_engine (line 60) | void serialize_engine(std::string& wts_name, std::string& engine_name, s...
  function deserialize_engine (line 81) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 106) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 138) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function parse_args (line 172) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 219) | int main(int argc, char** argv) {

FILE: yolo11/yolo11_seg_trt.py
  function get_img_path_batches (line 25) | def get_img_path_batches(batch_size, img_dir):
  function plot_one_box (line 39) | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  class YoLo11TRT (line 76) | class YoLo11TRT(object):
    method __init__ (line 81) | def __init__(self, engine_file_path):
    method infer (line 140) | def infer(self, raw_image_generator):
    method destroy (line 211) | def destroy(self):
    method get_raw_image (line 215) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 222) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_image (line 229) | def preprocess_image(self, raw_bgr_image):
    method xywh2xyxy (line 277) | def xywh2xyxy(self, origin_h, origin_w, x):
    method post_process (line 305) | def post_process(self, output, origin_h, origin_w):
    method bbox_iou (line 330) | def bbox_iou(self, box1, box2, x1y1x2y2=True):
    method non_max_suppression (line 367) | def non_max_suppression(self, prediction, origin_h, origin_w, conf_thr...
    method sigmoid (line 405) | def sigmoid(self, x):
    method scale_mask (line 408) | def scale_mask(self, mask, ih, iw):
    method process_mask (line 426) | def process_mask(self, output_proto_mask, result_proto_coef, result_bo...
    method draw_mask (line 462) | def draw_mask(self, masks, colors_, im_src, alpha=0.5):
  class inferThread (line 484) | class inferThread(threading.Thread):
    method __init__ (line 485) | def __init__(self, yolo11_wrapper, image_path_batch):
    method run (line 490) | def run(self):
  class warmUpThread (line 500) | class warmUpThread(threading.Thread):
    method __init__ (line 501) | def __init__(self, yolo11_wrapper):
    method run (line 505) | def run(self):
  class Colors (line 510) | class Colors:
    method __init__ (line 511) | def __init__(self):
    method __call__ (line 519) | def __call__(self, i, bgr=False):
    method hex2rgb (line 524) | def hex2rgb(h):  # rgb order (PIL)

FILE: yolo11_tripy/classify.py
  function load_image (line 13) | def load_image(path):
  function preprocess (line 17) | def preprocess(image):
  function main (line 46) | def main():

FILE: yolo11_tripy/compile_classifier.py
  function get_model_config (line 14) | def get_model_config(model_variant):
  function download_weights (line 32) | def download_weights(model_variant, directory):
  function load_weights (line 61) | def load_weights(weights_path, dtype):
  function main (line 79) | def main():

FILE: yolo11_tripy/model/block.py
  class ConvBnSilu (line 4) | class ConvBnSilu(tp.Module):
    method __init__ (line 5) | def __init__(self, in_channels, out_channels, kernel_dims, stride, dty...
    method forward (line 18) | def forward(self, x):
  class Bottleneck (line 25) | class Bottleneck(tp.Module):
    method __init__ (line 26) | def __init__(
    method forward (line 49) | def forward(self, x):
  class C3k (line 57) | class C3k(tp.Module):
    method __init__ (line 58) | def __init__(
    method forward (line 110) | def forward(self, x):
  class C3K2 (line 120) | class C3K2(tp.Module):
    method __init__ (line 121) | def __init__(
    method forward (line 175) | def forward(self, x):
  class ConvBn (line 189) | class ConvBn(tp.Module):
    method __init__ (line 190) | def __init__(self, in_channels, out_channels, kernel_dims, stride, dty...
    method forward (line 204) | def forward(self, x):
  class Attention (line 210) | class Attention(tp.Module):
    method __init__ (line 211) | def __init__(self, dim, num_heads, attn_ratio, dtype):
    method forward (line 226) | def forward(self, x):
  class PSABlock (line 254) | class PSABlock(tp.Module):
    method __init__ (line 255) | def __init__(self, dim, attn_ratio, num_heads, shortcut, dtype):
    method forward (line 266) | def forward(self, x):
  class C2PSA (line 282) | class C2PSA(tp.Module):
    method __init__ (line 283) | def __init__(self, input_channels, output_channels, num_layers, expans...
    method forward (line 304) | def forward(self, x):

FILE: yolo11_tripy/model/model.py
  function get_width (line 10) | def get_width(w, gw, max_channels, divisor=8):
  function get_depth (line 14) | def get_depth(d, gd):
  class Yolo11Head (line 25) | class Yolo11Head(tp.Module):
    method __init__ (line 26) | def __init__(self, input_channels, dtype):
    method forward (line 31) | def forward(self, x):
  class Yolo11Cls (line 39) | class Yolo11Cls(tp.Module):
    method __init__ (line 40) | def __init__(self, model_variant, gd, gw, max_channels, dtype=tp.float...
    method forward (line 119) | def forward(self, x):

FILE: yolo26/gen_wts.py
  function parse_args (line 8) | def parse_args():

FILE: yolo26/include/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: yolo26/include/types.h
  function Detection (line 4) | struct alignas(float) Detection {
  type AffineMatrix (line 14) | struct AffineMatrix {

FILE: yolo26/include/utils.h
  function cv (line 6) | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int inpu...
  function read_files_in_dir (line 28) | static inline int read_files_in_dir(const char* p_dir_name, std::vector<...
  function std (line 50) | inline std::vector<std::string> read_classes(std::string file_name) {
  function std (line 67) | static inline std::string trim_leading_whitespace(const std::string& str) {
  function read_labels (line 84) | static inline int read_labels(const std::string labels_filename, std::un...
  function parse_args (line 103) | static inline bool parse_args(int argc, char** argv, std::string& wts, s...

FILE: yolo26/plugin/yololayer.h
  function namespace (line 7) | namespace nvinfer1 {

FILE: yolo26/src/block.cpp
  function loadWeights (line 10) | std::map<std::string, nvinfer1::Weights> loadWeights(const std::string f...

FILE: yolo26/src/model.cpp
  function get_width (line 9) | static int get_width(int x, float gw, int max_channels, int divisor = 8) {
  function get_depth (line 15) | static int get_depth(int x, float gd) {
  function calculateStrides (line 24) | void calculateStrides(nvinfer1::IElementWiseLayer* conv_layers[], int si...

FILE: yolo26/src/postprocess.cpp
  function get_rect (line 5) | cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
  function get_rect_obb (line 37) | cv::Rect get_rect_obb(cv::Mat& img, float bbox[4]) {
  function get_rect_adapt_landmark (line 69) | cv::Rect get_rect_adapt_landmark(cv::Mat& img, float bbox[4], float lmk[...
  function iou (line 102) | static float iou(float lbox[4], float rbox[4]) {
  function cmp (line 118) | static bool cmp(const Detection& a, const Detection& b) {
  function decode (line 125) | void decode(std::vector<Detection>& res, float* output) {
  function batch_decode (line 136) | void batch_decode(std::vector<std::vector<Detection>>& res_batch, float*...
  function draw_bbox (line 143) | void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<...
  function draw_bbox_keypoints_line (line 156) | void draw_bbox_keypoints_line(std::vector<cv::Mat>& img_batch, std::vect...
  function scale_mask (line 190) | cv::Mat scale_mask(cv::Mat mask, cv::Mat img) {
  function draw_mask_bbox (line 211) | void draw_mask_bbox(cv::Mat& img, std::vector<Detection>& dets, std::vec...
  function convariance_matrix (line 256) | std::tuple<float, float, float> convariance_matrix(Detection res) {
  function probiou (line 277) | static float probiou(const Detection& res1, const Detection& res2, float...
  function decode_obb (line 310) | void decode_obb(std::vector<Detection>& res, float* output) {
  function batch_decode_obb (line 321) | void batch_decode_obb(std::vector<std::vector<Detection>>& res_batch, fl...
  function get_corner (line 328) | static std::vector<cv::Point> get_corner(cv::Mat& img, const Detection& ...
  function draw_bbox_obb (line 407) | void draw_bbox_obb(std::vector<cv::Mat>& img_batch, std::vector<std::vec...

FILE: yolo26/yolo26_cls.cpp
  function batch_preprocess (line 19) | void batch_preprocess(std::vector<cv::Mat>& imgs, float* output, int dst...
  function serialize_engine (line 48) | void serialize_engine(const std::string& wts_name, std::string& engine_n...
  function deserialize_engine (line 67) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 92) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 110) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function topk (line 120) | std::vector<int> topk(const std::vector<float>& vec, int k) {
  function main (line 137) | int main(int argc, char** argv) {

FILE: yolo26/yolo26_det.cpp
  function serialize_engine (line 18) | void serialize_engine(const std::string& wts_name, std::string& engine_n...
  function deserialize_engine (line 37) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 62) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 79) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function main (line 95) | int main(int argc, char** argv) {

FILE: yolo26/yolo26_obb.cpp
  function serialize_engine (line 17) | void serialize_engine(const std::string& wts_name, std::string& engine_n...
  function deserialize_engine (line 36) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 61) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 78) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function main (line 93) | int main(int argc, char** argv) {

FILE: yolop/common.hpp
  function get_rect (line 13) | cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
  function iou (line 39) | float iou(float lbox[4], float rbox[4]) {
  function cmp (line 54) | bool cmp(const Yolo::Detection& a, const Yolo::Detection& b) {
  function nms (line 58) | void nms(std::vector<Yolo::Detection>& res, float *output, float conf_th...
  function loadWeights (line 87) | std::map<std::string, Weights> loadWeights(const std::string file) {
  function IScaleLayer (line 125) | IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::s...
  function ILayer (line 158) | ILayer* convBlock(INetworkDefinition *network, std::map<std::string, Wei...
  function ILayer (line 184) | ILayer* focus(INetworkDefinition *network, std::map<std::string, Weights...
  function ILayer (line 195) | ILayer* bottleneck(INetworkDefinition *network, std::map<std::string, We...
  function ILayer (line 205) | ILayer* bottleneckCSP(INetworkDefinition *network, std::map<std::string,...
  function ILayer (line 228) | ILayer* C3(INetworkDefinition *network, std::map<std::string, Weights>& ...
  function ILayer (line 245) | ILayer* SPP(INetworkDefinition *network, std::map<std::string, Weights>&...
  function ILayer (line 266) | ILayer* preprocess_layer(INetworkDefinition *network, std::map<std::stri...
  function getAnchors (line 292) | std::vector<float> getAnchors(std::map<std::string, Weights>& weightMap)
  function IPluginV2Layer (line 318) | IPluginV2Layer* addYoLoLayer(INetworkDefinition *network, std::map<std::...

FILE: yolop/logging.h
  function class (line 22) | class Logger : public nvinfer1::ILogger

FILE: yolop/utils.h
  function cv (line 11) | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int inpu...
  function read_files_in_dir (line 38) | static inline int read_files_in_dir(const char *p_dir_name, std::vector<...

FILE: yolop/yololayer.h
  function namespace (line 9) | namespace Yolo
  function class (line 37) | class YoloLayerPlugin : public IPluginV2IOExt

FILE: yolop/yolop.cpp
  function main (line 4) | int main(int argc, char** argv) {

FILE: yolop/yolop.hpp
  function ICudaEngine (line 27) | ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, ...
  function APIToModel (line 187) | void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream, st...
  function doInference (line 205) | void doInference(IExecutionContext& context, cudaStream_t& stream, void ...
  function doInferenceCpu (line 215) | void doInferenceCpu(IExecutionContext& context, cudaStream_t& stream, vo...
  function parse_args (line 225) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...

FILE: yolop/yolop_trt.py
  function get_img_path_batches (line 21) | def get_img_path_batches(batch_size, img_dir):
  function plot_one_box (line 34) | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  class YolopTRT (line 50) | class YolopTRT(object):
    method __init__ (line 55) | def __init__(self, engine_file_path):
    method infer (line 108) | def infer(self, raw_image_generator):
    method destroy (line 180) | def destroy(self):
    method get_raw_image (line 184) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 188) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_image (line 192) | def preprocess_image(self, raw_bgr_image):
    method xywh2xyxy (line 229) | def xywh2xyxy(self, origin_h, origin_w, x):
    method post_process (line 257) | def post_process(self, output, origin_h, origin_w):
    method bbox_iou (line 269) | def bbox_iou(self, box1, box2, x1y1x2y2=True):
    method non_max_suppression (line 306) | def non_max_suppression(self, prediction, origin_h, origin_w, conf_thr...

FILE: yolov10/gen_wts.py
  function parse_args (line 14) | def parse_args():

FILE: yolov10/include/calibrator.h
  function class (line 14) | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {

FILE: yolov10/include/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: yolov10/include/types.h
  function Detection (line 4) | struct alignas(float) Detection {
  type AffineMatrix (line 11) | struct AffineMatrix {

FILE: yolov10/include/utils.h
  function cv (line 6) | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int inpu...
  function read_files_in_dir (line 28) | static inline int read_files_in_dir(const char* p_dir_name, std::vector<...
  function std (line 51) | static inline std::string trim_leading_whitespace(const std::string& str) {
  function read_labels (line 68) | static inline int read_labels(const std::string labels_filename, std::un...

FILE: yolov10/plugin/yololayer.h
  function namespace (line 8) | namespace nvinfer1 {

FILE: yolov10/src/block.cpp
  function loadWeights (line 9) | std::map<std::string, nvinfer1::Weights> loadWeights(const std::string f...

FILE: yolov10/src/model.cpp
  function get_width (line 9) | static int get_width(int x, float gw, int max_channels, int divisor = 8) {
  function get_depth (line 15) | static int get_depth(int x, float gd) {
  function calculateStrides (line 24) | void calculateStrides(nvinfer1::ILayer* conv_layers[], int size, int ref...

FILE: yolov10/src/postprocess.cpp
  function get_rect (line 3) | cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
  function get_topk (line 35) | void get_topk(std::vector<Detection>& res, float* output, float conf_thr...
  function batch_topk (line 46) | void batch_topk(std::vector<std::vector<Detection>>& res_batch, float* o...
  function draw_bbox (line 54) | void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<...

FILE: yolov10/yolov10_det.cpp
  function serialize_engine (line 15) | void serialize_engine(std::string& wts_name, std::string& engine_name, s...
  function deserialize_engine (line 49) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 74) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 90) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function parse_args (line 104) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 155) | int main(int argc, char** argv) {

FILE: yolov10/yolov10_det_trt.py
  function get_img_path_batches (line 25) | def get_img_path_batches(batch_size, img_dir):
  function plot_one_box (line 39) | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  class Yolov10TRT (line 76) | class Yolov10TRT(object):
    method __init__ (line 81) | def __init__(self, engine_file_path):
    method infer (line 134) | def infer(self, raw_image_generator):
    method destroy (line 193) | def destroy(self):
    method get_raw_image (line 197) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 204) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_image (line 211) | def preprocess_image(self, raw_bgr_image):
    method xywh2xyxy (line 259) | def xywh2xyxy(self, origin_h, origin_w, x):
    method post_process (line 287) | def post_process(self, output, origin_h, origin_w):
    method bbox_iou (line 312) | def bbox_iou(self, box1, box2, x1y1x2y2=True):
    method non_max_suppression (line 349) | def non_max_suppression(self, prediction, origin_h, origin_w, conf_thr...
  class inferThread (line 388) | class inferThread(threading.Thread):
    method __init__ (line 389) | def __init__(self, yolov8_wrapper, image_path_batch):
    method run (line 394) | def run(self):
  class warmUpThread (line 404) | class warmUpThread(threading.Thread):
    method __init__ (line 405) | def __init__(self, yolov8_wrapper):
    method run (line 409) | def run(self):

FILE: yolov12-tubro/gen_wts.py
  function parse_args (line 8) | def parse_args():

FILE: yolov12-tubro/include/calibrator.h
  function class (line 14) | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {

FILE: yolov12-tubro/include/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: yolov12-tubro/include/types.h
  function Detection (line 4) | struct alignas(float) Detection {
  type AffineMatrix (line 14) | struct AffineMatrix {

FILE: yolov12-tubro/include/utils.h
  function cv (line 6) | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int inpu...
  function read_files_in_dir (line 28) | static inline int read_files_in_dir(const char* p_dir_name, std::vector<...
  function std (line 51) | static inline std::string trim_leading_whitespace(const std::string& str) {
  function read_labels (line 68) | static inline int read_labels(const std::string labels_filename, std::un...

FILE: yolov12-tubro/plugin/yololayer.h
  function namespace (line 9) | namespace nvinfer1 {

FILE: yolov12-tubro/src/block.cpp
  function loadWeights (line 10) | std::map<std::string, nvinfer1::Weights> loadWeights(const std::string f...

FILE: yolov12-tubro/src/model.cpp
  function get_width (line 9) | static int get_width(int x, float gw, int max_channels, int divisor = 8) {
  function get_depth (line 15) | static int get_depth(int x, float gd) {
  function calculateStrides (line 115) | void calculateStrides(nvinfer1::IElementWiseLayer* conv_layers[], int si...
  function calculateStrides (line 124) | void calculateStrides(nvinfer1::ILayer* conv_layers[], int size, int ref...

FILE: yolov12-tubro/src/postprocess.cpp
  function get_rect (line 4) | cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
  function get_rect_obb (line 36) | cv::Rect get_rect_obb(cv::Mat& img, float bbox[4]) {
  function get_rect_adapt_landmark (line 68) | cv::Rect get_rect_adapt_landmark(cv::Mat& img, float bbox[4], float lmk[...
  function iou (line 101) | static float iou(float lbox[4], float rbox[4]) {
  function cmp (line 117) | static bool cmp(const Detection& a, const Detection& b) {
  function nms (line 124) | void nms(std::vector<Detection>& res, float* output, float conf_thresh, ...
  function batch_nms (line 153) | void batch_nms(std::vector<std::vector<Detection>>& res_batch, float* ou...
  function process_decode_ptr_host (line 161) | void process_decode_ptr_host(std::vector<Detection>& res, const float* d...
  function batch_process (line 179) | void batch_process(std::vector<std::vector<Detection>>& res_batch, const...
  function draw_bbox (line 190) | void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<...
  function draw_bbox_keypoints_line (line 203) | void draw_bbox_keypoints_line(std::vector<cv::Mat>& img_batch, std::vect...
  function scale_mask (line 237) | cv::Mat scale_mask(cv::Mat mask, cv::Mat img) {
  function draw_mask_bbox (line 258) | void draw_mask_bbox(cv::Mat& img, std::vector<Detection>& dets, std::vec...
  function process_decode_ptr_host_obb (line 303) | void process_decode_ptr_host_obb(std::vector<Detection>& res, const floa...
  function batch_process_obb (line 322) | void batch_process_obb(std::vector<std::vector<Detection>>& res_batch, c...
  function convariance_matrix (line 333) | std::tuple<float, float, float> convariance_matrix(Detection res) {
  function probiou (line 354) | static float probiou(const Detection& res1, const Detection& res2, float...
  function nms_obb (line 387) | void nms_obb(std::vector<Detection>& res, float* output, float conf_thre...
  function batch_nms_obb (line 417) | void batch_nms_obb(std::vector<std::vector<Detection>>& res_batch, float...
  function get_corner (line 425) | static std::vector<cv::Point> get_corner(cv::Mat& img, const Detection& ...
  function draw_bbox_obb (line 504) | void draw_bbox_obb(std::vector<cv::Mat>& img_batch, std::vector<std::vec...

FILE: yolov12-tubro/yolov12_cls.cpp
  function batch_preprocess (line 19) | void batch_preprocess(std::vector<cv::Mat>& imgs, float* output, int dst...
  function softmax (line 48) | std::vector<float> softmax(float* prob, int n) {
  function topk (line 63) | std::vector<int> topk(const std::vector<float>& vec, int k) {
  function read_classes (line 80) | std::vector<std::string> read_classes(std::string file_name) {
  function parse_args (line 95) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function prepare_buffers (line 140) | void prepare_buffers(ICudaEngine* engine, float** gpu_input_buffer, floa...
  function infer (line 157) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function serialize_engine (line 167) | void serialize_engine(float& gd, float& gw, std::string& wts_name, std::...
  function deserialize_engine (line 190) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function main (line 215) | int main(int argc, char** argv) {

FILE: yolov12-tubro/yolov12_cls_trt.py
  function get_img_path_batches (line 17) | def get_img_path_batches(batch_size, img_dir):
  class YoLov12TRT (line 31) | class YoLov12TRT(object):
    method __init__ (line 36) | def __init__(self, engine_file_path):
    method infer (line 87) | def infer(self, raw_image_generator):
    method destroy (line 134) | def destroy(self):
    method get_raw_image (line 138) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 145) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_cls_image (line 152) | def preprocess_cls_image(self, raw_bgr_image, dst_width=224, dst_heigh...
    method postprocess_cls (line 199) | def postprocess_cls(self, output_data):
  class inferThread (line 216) | class inferThread(threading.Thread):
    method __init__ (line 217) | def __init__(self, yolov12_wrapper, image_path_batch):
    method run (line 222) | def run(self):
  class warmUpThread (line 234) | class warmUpThread(threading.Thread):
    method __init__ (line 235) | def __init__(self, yolov12_wrapper):
    method run (line 239) | def run(self):

FILE: yolov12-tubro/yolov12_det.cpp
  function serialize_engine (line 16) | void serialize_engine(std::string& wts_name, std::string& engine_name, f...
  function deserialize_engine (line 37) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 62) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 88) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function parse_args (line 115) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 162) | int main(int argc, char** argv) {

FILE: yolov12-tubro/yolov12_det_trt.py
  function get_img_path_batches (line 25) | def get_img_path_batches(batch_size, img_dir):
  function plot_one_box (line 39) | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  class YoLo12TRT (line 76) | class YoLo12TRT(object):
    method __init__ (line 81) | def __init__(self, engine_file_path):
    method infer (line 130) | def infer(self, raw_image_generator):
    method destroy (line 189) | def destroy(self):
    method get_raw_image (line 193) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 200) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_image (line 207) | def preprocess_image(self, raw_bgr_image):
    method xywh2xyxy (line 255) | def xywh2xyxy(self, origin_h, origin_w, x):
    method post_process (line 283) | def post_process(self, output, origin_h, origin_w):
    method bbox_iou (line 309) | def bbox_iou(self, box1, box2, x1y1x2y2=True):
    method non_max_suppression (line 346) | def non_max_suppression(self, prediction, origin_h, origin_w, conf_thr...
  class inferThread (line 385) | class inferThread(threading.Thread):
    method __init__ (line 386) | def __init__(self, yolo11_wrapper, image_path_batch):
    method run (line 391) | def run(self):
  class warmUpThread (line 401) | class warmUpThread(threading.Thread):
    method __init__ (line 402) | def __init__(self, yolo11_wrapper):
    method run (line 406) | def run(self):

FILE: yolov12-tubro/yolov12_seg.cpp
  function get_downscale_rect (line 17) | static cv::Rect get_downscale_rect(float bbox[4], float scale) {
  function process_mask (line 36) | std::vector<cv::Mat> process_mask(const float* proto, int proto_size, st...
  function serialize_engine (line 60) | void serialize_engine(std::string& wts_name, std::string& engine_name, s...
  function deserialize_engine (line 81) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 106) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 138) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function parse_args (line 172) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 219) | int main(int argc, char** argv) {

FILE: yolov12-tubro/yolov12_seg_trt.py
  function get_img_path_batches (line 25) | def get_img_path_batches(batch_size, img_dir):
  function plot_one_box (line 39) | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
  class YoLo12TRT (line 76) | class YoLo12TRT(object):
    method __init__ (line 81) | def __init__(self, engine_file_path):
    method infer (line 140) | def infer(self, raw_image_generator):
    method destroy (line 211) | def destroy(self):
    method get_raw_image (line 215) | def get_raw_image(self, image_path_batch):
    method get_raw_image_zeros (line 222) | def get_raw_image_zeros(self, image_path_batch=None):
    method preprocess_image (line 229) | def preprocess_image(self, raw_bgr_image):
    method xywh2xyxy (line 277) | def xywh2xyxy(self, origin_h, origin_w, x):
    method post_process (line 305) | def post_process(self, output, origin_h, origin_w):
    method bbox_iou (line 331) | def bbox_iou(self, box1, box2, x1y1x2y2=True):
    method non_max_suppression (line 368) | def non_max_suppression(self, prediction, origin_h, origin_w, conf_thr...
    method sigmoid (line 406) | def sigmoid(self, x):
    method scale_mask (line 409) | def scale_mask(self, mask, ih, iw):
    method process_mask (line 427) | def process_mask(self, output_proto_mask, result_proto_coef, result_bo...
    method draw_mask (line 463) | def draw_mask(self, masks, colors_, im_src, alpha=0.5):
  class inferThread (line 485) | class inferThread(threading.Thread):
    method __init__ (line 486) | def __init__(self, yolo11_wrapper, image_path_batch):
    method run (line 491) | def run(self):
  class warmUpThread (line 501) | class warmUpThread(threading.Thread):
    method __init__ (line 502) | def __init__(self, yolo11_wrapper):
    method run (line 506) | def run(self):
  class Colors (line 511) | class Colors:
    method __init__ (line 512) | def __init__(self):
    method __call__ (line 520) | def __call__(self, i, bgr=False):
    method hex2rgb (line 525) | def hex2rgb(h):  # rgb order (PIL)

FILE: yolov12/gen_wts.py
  function parse_args (line 8) | def parse_args():

FILE: yolov12/include/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: yolov12/include/types.h
  function Detection (line 4) | struct alignas(float) Detection {
  type AffineMatrix (line 14) | struct AffineMatrix {

FILE: yolov12/include/utils.h
  function cv (line 6) | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int inpu...
  function read_files_in_dir (line 28) | static inline int read_files_in_dir(const char* p_dir_name, std::vector<...
  function std (line 51) | static inline std::string trim_leading_whitespace(const std::string& str) {
  function read_labels (line 68) | static inline int read_labels(const std::string labels_filename, std::un...

FILE: yolov12/plugin/yololayer.h
  function namespace (line 9) | namespace nvinfer1 {

FILE: yolov12/src/block.cpp
  function loadWeights (line 10) | std::map<std::string, nvinfer1::Weights> loadWeights(const std::string f...

FILE: yolov12/src/model.cpp
  function get_width (line 9) | static int get_width(int x, float gw, int max_channels, int divisor = 8) {
  function get_depth (line 15) | static int get_depth(int x, float gd) {
  function calculateStrides (line 24) | void calculateStrides(nvinfer1::IElementWiseLayer* conv_layers[], int si...

FILE: yolov12/src/postprocess.cpp
  function get_rect (line 4) | cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
  function get_rect_obb (line 36) | cv::Rect get_rect_obb(cv::Mat& img, float bbox[4]) {
  function get_rect_adapt_landmark (line 68) | cv::Rect get_rect_adapt_landmark(cv::Mat& img, float bbox[4], float lmk[...
  function iou (line 101) | static float iou(float lbox[4], float rbox[4]) {
  function cmp (line 117) | static bool cmp(const Detection& a, const Detection& b) {
  function nms (line 124) | void nms(std::vector<Detection>& res, float* output, float conf_thresh, ...
  function batch_nms (line 153) | void batch_nms(std::vector<std::vector<Detection>>& res_batch, float* ou...
  function process_decode_ptr_host (line 161) | void process_decode_ptr_host(std::vector<Detection>& res, const float* d...
  function batch_process (line 179) | void batch_process(std::vector<std::vector<Detection>>& res_batch, const...
  function draw_bbox (line 190) | void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<...
  function draw_bbox_keypoints_line (line 203) | void draw_bbox_keypoints_line(std::vector<cv::Mat>& img_batch, std::vect...
  function scale_mask (line 237) | cv::Mat scale_mask(cv::Mat mask, cv::Mat img) {
  function draw_mask_bbox (line 258) | void draw_mask_bbox(cv::Mat& img, std::vector<Detection>& dets, std::vec...
  function process_decode_ptr_host_obb (line 303) | void process_decode_ptr_host_obb(std::vector<Detection>& res, const floa...
  function batch_process_obb (line 322) | void batch_process_obb(std::vector<std::vector<Detection>>& res_batch, c...
  function convariance_matrix (line 333) | std::tuple<float, float, float> convariance_matrix(Detection res) {
  function probiou (line 354) | static float probiou(const Detection& res1, const Detection& res2, float...
  function nms_obb (line 387) | void nms_obb(std::vector<Detection>& res, float* output, float conf_thre...
  function batch_nms_obb (line 417) | void batch_nms_obb(std::vector<std::vector<Detection>>& res_batch, float...
  function get_corner (line 425) | static std::vector<cv::Point> get_corner(cv::Mat& img, const Detection& ...
  function draw_bbox_obb (line 504) | void draw_bbox_obb(std::vector<cv::Mat>& img_batch, std::vector<std::vec...

FILE: yolov12/yolo12_det.cpp
  function serialize_engine (line 16) | void serialize_engine(std::string& wts_name, std::string& engine_name, f...
  function deserialize_engine (line 37) | void deserialize_engine(std::string& engine_name, IRuntime** runtime, IC...
  function prepare_buffer (line 62) | void prepare_buffer(ICudaEngine* engine, float** input_buffer_device, fl...
  function infer (line 88) | void infer(IExecutionContext& context, cudaStream_t& stream, void** buff...
  function parse_args (line 115) | bool parse_args(int argc, char** argv, std::string& wts, std::string& en...
  function main (line 162) | int main(int argc, char** argv) {

FILE: yolov13/gen_wts.py
  function parse_args (line 8) | def parse_args():

FILE: yolov13/include/calibrator.h
  function class (line 14) | class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {

FILE: yolov13/include/logging.h
  function class (line 32) | class LogStreamConsumerBuffer : public std::stringbuf {
  function class (line 91) | class LogStreamConsumerBase {
  function std (line 137) | static std::string severityPrefix(Severity severity) {
  function TestResult (line 191) | enum class TestResult {
  function LogStreamConsumer (line 405) | inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
  function LogStreamConsumer (line 416) | inline LogStreamConsumer LOG_INFO(const Logger& logger) {
  function LogStreamConsumer (line 427) | inline LogStreamConsumer LOG_WARN(const Logger& logger) {
  function LogStreamConsumer (line 438) | inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
  function LogStreamConsumer (line 450) | inline LogStreamConsumer LOG_FATAL(const Logger& logger) {

FILE: yolov13/include/types.h
  function Detection (line 4) | struct alignas(float) Detection {
  type AffineMatrix (line 11) | struct AffineMatrix {

FILE: yolov13/include/utils.h
  function cv (line 6) | static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int inpu...
  function read_files_in_dir (line 28) | static inline int read_files_in_dir(const char* p_dir_name, std::vector<...
  function std (line 51) | static inline std::string trim_leading_whitespace(const std::string& str) {
  function read_labels (line 68) | static inline int read_labels(const std::string labels_filename, std::un...

FILE: yolov13/plugin/yololayer.h
  function namespace (line 9) | namespace nvinfer1 {

FILE: yolov13/src/block.cpp
  function loadWeights (line 10) | std::map<std::string, nvinfer1::Weights> loadWeights(const std::string f...
  function cout_dim (line 291) | void cout_dim(nvinfer1::ITensor& input) {

FILE: yolov13/src/model.cpp
  function get_width (line 9) | static int get_width(int x, float gw, int max_channels, int divisor = 8) {
  function get_depth (line 15) | static int get_depth(int x, float gd) {
  function calculateStrides (line 25) | void calculateStrides(nvinfer1::IElementWiseLayer* conv_layers[], int si...
  function calculateStrides (line 34) | void calculateStrides(nvinfer1::ILayer* conv_layers[], int size, int ref...

FILE: yolov13/src/postprocess.cpp
  function get_rect (line 4) | cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
  function iou (line 36) | static float iou(float lbox[4], float rbox[4]) {
  function cmp (line 52) | static bool cmp(const Detection& a, const Detection& b) {
  function nms (line 59) | void nms(std::vector<Detection>& res, float* output, float conf_thresh, ...
  function batch_nms (line 88) | void batch_nms(std::vector<std::vector<Detection>>& res_batch, float* ou...
  function process_decode_ptr_host (line 96) | void process_decode_ptr_host(std::vector<Detection>& res, const float* d...
  function batch_process (line 114) | void batch_process(std::vector<std::vector<Detection>>& res_batch, const...
  function draw_bbox (line 125) | void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<...

FILE: yolov13/yolov13_det.cpp
  function get_executable_dir (line 24) | static std::string get_executable_dir() {
  function serialize_engine (line 49) | void serialize_engine(std::string& wts_name, std::stri

Download .json

Condensed preview — 744 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (6,153K chars).

[
  {
    "path": ".clang-format",
    "chars": 2602,
    "preview": "# Google C/C++ Code Style settings (with 4-space)\n# Refered to https://github.com/kehanXue/google-style-clang-format/blo"
  },
  {
    "path": ".cmake-format.yaml",
    "chars": 9269,
    "preview": "_help_parse: Options affecting listfile parsing\nparse:\n  _help_additional_commands:\n  - Specify structure for custom cma"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/tensorrtx-issue-template.md",
    "chars": 461,
    "preview": "---\nname: tensorrtx issue template\nabout: To understand your issue better\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n## En"
  },
  {
    "path": ".github/stale.yml",
    "chars": 684,
    "preview": "# Number of days of inactivity before an issue becomes stale\ndaysUntilStale: 60\n# Number of days of inactivity before a "
  },
  {
    "path": ".github/workflows/pre-commit.yml",
    "chars": 1060,
    "preview": "name: pre-commit\n\non:\n  pull_request:\n    branches:\n      - master\n      - trt10\n\n  push:\n    branches:\n      - master\n "
  },
  {
    "path": ".gitignore",
    "chars": 946,
    "preview": "models\nbuild\n*.wts\n*.engine\n*.tpymodel\n*/*.ppm\n*idea*\n\n.vscode/*\n!.vscode/settings.json\n!.vscode/tasks.json\n!.vscode/lau"
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 876,
    "preview": "repos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.5.0\n    hooks:\n      - id: check-merge-confl"
  },
  {
    "path": "LICENSE",
    "chars": 1072,
    "preview": "MIT License\n\nCopyright (c) 2019-2020 Wang Xinyu\n\nPermission is hereby granted, free of charge, to any person obtaining a"
  },
  {
    "path": "README.md",
    "chars": 27616,
    "preview": "# TensorRTx\n\nTensorRTx aims to implement popular deep learning networks with TensorRT network definition API.\n\nWhy don't"
  },
  {
    "path": "alexnet/CMakeLists.txt",
    "chars": 1102,
    "preview": "cmake_minimum_required(VERSION 3.14)\n\nproject(\n  alexnet\n  VERSION 0.1\n  LANGUAGES C CXX CUDA)\n\nif(NOT DEFINED CMAKE_CUD"
  },
  {
    "path": "alexnet/FindTensorRT.cmake",
    "chars": 4430,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nfunction(_guess_path var_name required_files)\n  set(_result \"\")\n\n  foreach(path_"
  },
  {
    "path": "alexnet/README.md",
    "chars": 4087,
    "preview": "# alexnet\n\n## Introduction\n\nAlexNet model architecture comes from this paper: [One weird trick for parallelizing convolu"
  },
  {
    "path": "alexnet/alexnet.cc",
    "chars": 13851,
    "preview": "#include <array>\n#include <chrono>\n#include <cmath>\n#include <opencv2/opencv.hpp>\n#include <vector>\n#include \"logging.h\""
  },
  {
    "path": "alexnet/alexnet.py",
    "chars": 8916,
    "preview": "import os\nimport sys\nimport struct\nimport argparse\n\nimport numpy as np\nimport pycuda.autoinit\nimport pycuda.driver as cu"
  },
  {
    "path": "alexnet/gen_wts.py",
    "chars": 1978,
    "preview": "import struct\n\nimport cv2\nimport numpy as np\nimport torch\nfrom torchvision.models import alexnet\n\n\ndef read_imagenet_lab"
  },
  {
    "path": "alexnet/logging.h",
    "chars": 16988,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "alexnet/macros.h",
    "chars": 564,
    "preview": "#pragma once\n\n#include <NvInfer.h>\n\n#ifdef API_EXPORTS\n#if defined(_MSC_VER)\n#define API __declspec(dllexport)\n#else\n#de"
  },
  {
    "path": "alexnet/utils.h",
    "chars": 8435,
    "preview": "#pragma once\n#include <cuda_runtime_api.h>\n#include <algorithm>\n#include <cassert>\n#include <cstddef>\n#include <fstream>"
  },
  {
    "path": "arcface/CMakeLists.txt",
    "chars": 1699,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(arcface)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIME "
  },
  {
    "path": "arcface/README.md",
    "chars": 3105,
    "preview": "# arcface\n### TensortRT 8\n\nThe mxnet implementation is from [deepinsight/insightface.](https://github.com/deepinsight/in"
  },
  {
    "path": "arcface/arcface-mobilefacenet.cpp",
    "chars": 19223,
    "preview": "#include <fstream>\r\n#include <iostream>\r\n#include <map>\r\n#include <sstream>\r\n#include <vector>\r\n#include <chrono>\r\n#incl"
  },
  {
    "path": "arcface/arcface-r100.cpp",
    "chars": 20447,
    "preview": "#include <fstream>\r\n#include <iostream>\r\n#include <map>\r\n#include <sstream>\r\n#include <vector>\r\n#include <chrono>\r\n#incl"
  },
  {
    "path": "arcface/arcface-r50.cpp",
    "chars": 17544,
    "preview": "#include <fstream>\n#include <iostream>\n#include <map>\n#include <sstream>\n#include <vector>\n#include <chrono>\n#include <o"
  },
  {
    "path": "arcface/gen_wts.py",
    "chars": 1416,
    "preview": "import struct\nimport sys\nimport argparse\nimport face_model\nimport cv2\nimport numpy as np\n\nparser = argparse.ArgumentPars"
  },
  {
    "path": "arcface/logging.h",
    "chars": 16675,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "arcface/macros.h",
    "chars": 210,
    "preview": "#ifndef __MACROS_H\n#define __MACROS_H\n\n#if NV_TENSORRT_MAJOR >= 8\n#define TRT_NOEXCEPT noexcept\n#define TRT_CONST_ENQUEU"
  },
  {
    "path": "arcface/prelu.cu",
    "chars": 6996,
    "preview": "#include <cmath>\n#include <stdio.h>\n#include <cassert>\n#include <iostream>\n#include \"prelu.h\"\n\nnamespace nvinfer1\n{\n    "
  },
  {
    "path": "arcface/prelu.h",
    "chars": 3954,
    "preview": "#ifndef _PRELU_PLUGIN_H\n#define _PRELU_PLUGIN_H\n\n#include <string>\n#include <vector>\n#include \"NvInfer.h\"\n#include \"macr"
  },
  {
    "path": "centernet/README.md",
    "chars": 1142,
    "preview": "# CenterNet\n\nThis is the trt implementation of detection model [ctdet_coco_dla_2x](https://drive.google.com/open?id=1pl_"
  },
  {
    "path": "centernet/centernet.py",
    "chars": 16029,
    "preview": "import numpy as np\n\nimport tensorrt as trt\nimport torch\n\nfrom sample import common\nimport argparse\nimport time\n\n# You ca"
  },
  {
    "path": "centernet/dcnv2Plugin/CMakeLists.txt",
    "chars": 867,
    "preview": "#\n# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "centernet/dcnv2Plugin/dcn_v2_im2col_cuda.cu",
    "chars": 19792,
    "preview": "#include \"dcn_v2_im2col_cuda.h\"\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#define CUDA_KERNEL_LOOP(i, n"
  },
  {
    "path": "centernet/dcnv2Plugin/dcn_v2_im2col_cuda.h",
    "chars": 5246,
    "preview": "/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contribu"
  },
  {
    "path": "centernet/dcnv2Plugin/dcnv2Plugin.cpp",
    "chars": 14744,
    "preview": "#include \"dcnv2Plugin.h\"\n#include <iostream>\n\nusing namespace nvinfer1;\nusing nvinfer1::plugin::DeformableConvolutionalL"
  },
  {
    "path": "centernet/dcnv2Plugin/dcnv2Plugin.h",
    "chars": 4439,
    "preview": "/*\n * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "centernet/sample/common.py",
    "chars": 9637,
    "preview": "#\n# Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.\n#\n# NOTICE TO LICENSEE:\n#\n# This source code and/or do"
  },
  {
    "path": "centernet/sample/test.py",
    "chars": 4660,
    "preview": "import cv2 as cv\nimport numpy as np\n\nimport tensorrt as trt\nimport common\n\nimport torch\nimport time\nfrom sys import argv"
  },
  {
    "path": "contributing.md",
    "chars": 734,
    "preview": "# How to Contribute\n\n1. Fork this repo to your github account\n\n2. Clone your fork\n\n3. Create a feature branch\n\n4. Make c"
  },
  {
    "path": "convnextv2/CMakeLists.txt",
    "chars": 1254,
    "preview": "cmake_minimum_required(VERSION 3.10)\nproject(convnextv2)\n\nfind_package(CUDA REQUIRED)\nfind_package(OpenCV REQUIRED)\n\ninc"
  },
  {
    "path": "convnextv2/README.md",
    "chars": 1154,
    "preview": "# ConvNeXtV2 TensorRT\n\n## Environment\n\n- ubuntu20.04\n-  cuda11.8\n-  cudnn8.9.7\n-  TensorRT8.6.1.6\n-  OpenCV4.13\n\n## Supp"
  },
  {
    "path": "convnextv2/config.yaml",
    "chars": 594,
    "preview": "# ConvNeXtV2 Configuration\n\n# Model variants reference:\n# Atto:  depths: [2, 2, 6, 2], dims: [40, 80, 160, 320]\n# Femto:"
  },
  {
    "path": "convnextv2/gen_wts.py",
    "chars": 2314,
    "preview": "import torch\nimport struct\n\n\ndef gen_wts(model_path, wts_path):\n    print(f\"Loading {model_path}...\")\n    try:\n        d"
  },
  {
    "path": "convnextv2/inference.py",
    "chars": 4743,
    "preview": "import tensorrt as trt\nimport pycuda.driver as cuda\nimport pycuda.autoinit  # noqa: F401\nimport numpy as np\nimport cv2\ni"
  },
  {
    "path": "convnextv2/src/LayerNormPlugin.cu",
    "chars": 9340,
    "preview": "#include <cuda_fp16.h>\n#include <cassert>\n#include <cstring>\n#include <cub/cub.cuh>\n#include <iostream>\n#include \"LayerN"
  },
  {
    "path": "convnextv2/src/LayerNormPlugin.h",
    "chars": 3110,
    "preview": "#ifndef LAYER_NORM_PLUGIN_H\n#define LAYER_NORM_PLUGIN_H\n\n#include <NvInfer.h>\n#include <string>\n#include <vector>\n\nusing"
  },
  {
    "path": "convnextv2/src/convnextv2.cpp",
    "chars": 22346,
    "preview": "#include <cuda_runtime_api.h>\n#include <algorithm>\n#include <cassert>\n#include <cmath>\n#include <fstream>\n#include <iost"
  },
  {
    "path": "convnextv2/src/inference_cpp.cpp",
    "chars": 6035,
    "preview": "#include <cuda_runtime_api.h>\n#include <fstream>\n#include <iostream>\n#include <opencv2/opencv.hpp>\n#include <vector>\n#in"
  },
  {
    "path": "convnextv2/src/logging.h",
    "chars": 990,
    "preview": "#ifndef LOGGING_H\n#define LOGGING_H\n\n#include <NvInfer.h>\n#include <iostream>\n\nusing namespace nvinfer1;\n\nclass Logger :"
  },
  {
    "path": "crnn/CMakeLists.txt",
    "chars": 879,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(crnn)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIME OFF"
  },
  {
    "path": "crnn/README.md",
    "chars": 1003,
    "preview": "# crnn\n\nThe Pytorch implementation is [meijieru/crnn.pytorch](https://github.com/meijieru/crnn.pytorch).\n\n## How to Run\n"
  },
  {
    "path": "crnn/crnn.cpp",
    "chars": 18418,
    "preview": "#include <iostream>\r\n#include <chrono>\r\n#include <map>\r\n#include <opencv2/opencv.hpp>\r\n#include \"NvInfer.h\"\r\n#include \"c"
  },
  {
    "path": "crnn/genwts.py",
    "chars": 859,
    "preview": "import torch\nfrom torch.autograd import Variable\nimport utils\nimport models.crnn as crnn\nimport struct\n\nmodel_path = './"
  },
  {
    "path": "crnn/logging.h",
    "chars": 16550,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "csrnet/CMakeLists.txt",
    "chars": 667,
    "preview": "cmake_minimum_required(VERSION 3.10)\n\nproject(csrnet)\n\nadd_definitions(-std=c++11)\nadd_definitions(-DAPI_EXPORTS)\noption"
  },
  {
    "path": "csrnet/README.md",
    "chars": 1451,
    "preview": "# csrnet\n\nThe Pytorch implementation is [leeyeehoo/CSRNet-pytorch](https://github.com/leeyeehoo/CSRNet-pytorch).\n\nThis r"
  },
  {
    "path": "csrnet/config.h",
    "chars": 518,
    "preview": "#pragma once\n\nconst static char *kInputTensorName = \"data\";\nconst static char *kOutputTensorName = \"prob\";\nconst static "
  },
  {
    "path": "csrnet/csrnet.cpp",
    "chars": 19707,
    "preview": "#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\n#include <chrono>\n#include <config.h>\n#include <cstring>\n#include <di"
  },
  {
    "path": "csrnet/gen_wts.py",
    "chars": 845,
    "preview": "from torch.nn.modules import module\nfrom model import CSRNet\nimport torch\nimport os\nimport struct\n\n\nsave_path = os.path."
  },
  {
    "path": "csrnet/logging.h",
    "chars": 16129,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "csrnet/macros.h",
    "chars": 211,
    "preview": "#ifndef __MACROS_H\n#define __MACROS_H\n\n#if NV_TENSORRT_MAJOR >= 8\n#define TRT_NOEXCEPT noexcept\n#define TRT_CONST_ENQUEU"
  },
  {
    "path": "dbnet/CMakeLists.txt",
    "chars": 850,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(dbnet)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIME OF"
  },
  {
    "path": "dbnet/README.md",
    "chars": 1664,
    "preview": "# DBNet\n\nThe Pytorch implementation is [DBNet](https://github.com/BaofengZan/DBNet.pytorch).\n\n<p align=\"center\">\n<img sr"
  },
  {
    "path": "dbnet/clipper/CMakeLists.txt",
    "chars": 118,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\naux_source_directory(. DIR_CLIPPER_SRCS)\nadd_library(clipper ${DIR_CLIPPER_SRCS})"
  },
  {
    "path": "dbnet/clipper/clipper.cpp",
    "chars": 137555,
    "preview": "/*******************************************************************************\n*                                      "
  },
  {
    "path": "dbnet/clipper/clipper.hpp",
    "chars": 15017,
    "preview": "/*******************************************************************************\n*                                      "
  },
  {
    "path": "dbnet/common.hpp",
    "chars": 6458,
    "preview": "#ifndef DBNET_COMMON_H_\n#define DBNET_COMMON_H_\n\n#include <iostream>\n#include <fstream>\n#include <map>\n#include <sstream"
  },
  {
    "path": "dbnet/dbnet.cpp",
    "chars": 23657,
    "preview": "#include <iostream>\n#include <chrono>\n#include \"cuda_runtime_api.h\"\n#include \"logging.h\"\n#include \"common.hpp\"\n#include "
  },
  {
    "path": "dbnet/logging.h",
    "chars": 16550,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "dbnet/utils.h",
    "chars": 3146,
    "preview": "#ifndef __TRT_UTILS_H_\n#define __TRT_UTILS_H_\n\n#include <iostream>\n#include <vector>\n#include <algorithm>\n#include <cudn"
  },
  {
    "path": "densenet/CMakeLists.txt",
    "chars": 901,
    "preview": "cmake_minimum_required(VERSION 3.10)\n\n# set the project name\nproject(densenet)\n\nadd_definitions(-std=c++11)\n\n# get main "
  },
  {
    "path": "densenet/README.md",
    "chars": 1030,
    "preview": "# Densenet121\n\nThe Pytorch implementation is [makaveli10/densenet](https://github.com/makaveli10/torchtrtz/tree/main/den"
  },
  {
    "path": "densenet/densenet121.cpp",
    "chars": 14504,
    "preview": "#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\n#include \"logging.h\"\n#include <fstream>\n#include <iostream>\n#include "
  },
  {
    "path": "densenet/densenet121.py",
    "chars": 10194,
    "preview": "import os\nimport sys\nimport struct\nimport argparse\n\nimport numpy as np\nimport pycuda.autoinit\nimport pycuda.driver as cu"
  },
  {
    "path": "densenet/logging.h",
    "chars": 16650,
    "preview": "/*\n * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "detr/CMakeLists.txt",
    "chars": 929,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(detr)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIME OFF"
  },
  {
    "path": "detr/README.md",
    "chars": 2781,
    "preview": "# DETR\n\nThe Pytorch implementation is [facebookresearch/detr](https://github.com/facebookresearch/detr).\n\nFor details se"
  },
  {
    "path": "detr/backbone.hpp",
    "chars": 9704,
    "preview": "#pragma once\n#include <map>\n#include \"common.hpp\"\n\nenum RESNETTYPE {\n    R18 = 0,\n    R34,\n    R50,\n    R101,\n    R152\n}"
  },
  {
    "path": "detr/calibrator.hpp",
    "chars": 4154,
    "preview": "#pragma once\n\n#include \"NvInfer.h\"\n#include <string>\n#include <vector>\n#include <iostream>\n#include <iterator>\n#include "
  },
  {
    "path": "detr/common.hpp",
    "chars": 2723,
    "preview": "#pragma once\n\n#include <dirent.h>\n#include <cuda_runtime_api.h>\n#include <fstream>\n#include <sstream>\n#include <iostream"
  },
  {
    "path": "detr/detr.cpp",
    "chars": 28017,
    "preview": "#pragma once\n#include <iostream>\n#include <unordered_map>\n#include \"./logging.h\"\n#include \"backbone.hpp\"\n#include \"calib"
  },
  {
    "path": "detr/gen_wts.py",
    "chars": 3935,
    "preview": "import cv2\n\nimport torch\nfrom models.transformer import Transformer\nfrom models.position_encoding import PositionEmbeddi"
  },
  {
    "path": "detr/logging.h",
    "chars": 16386,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "detr/macros.h",
    "chars": 485,
    "preview": "#ifndef __MACROS_H\n#define __MACROS_H\n\n#include \"NvInfer.h\"\n\n#ifdef API_EXPORTS\n#if defined(_MSC_VER)\n#define API __decl"
  },
  {
    "path": "docker/README.md",
    "chars": 2481,
    "preview": "# Tutorials\n\n## Introduction\n\nThis folder contains the docker and docker-compose file to build the development environme"
  },
  {
    "path": "docker/tensorrtx-docker-compose.yml",
    "chars": 910,
    "preview": "services:\n  tensorrt:\n    image: tensortx:1.0.1\n    container_name: tensortx\n    environment:\n      - NVIDIA_VISIBLE_DEV"
  },
  {
    "path": "docker/x86_64.dockerfile",
    "chars": 1542,
    "preview": "ARG TAG=24.01-py3\n\nFROM nvcr.io/nvidia/tensorrt:${TAG} AS tensorrtx\n\nENV DEBIAN_FRONTEND noninteractive\n\n# basic tools\nR"
  },
  {
    "path": "efficient_ad/CMakeLists.txt",
    "chars": 1399,
    "preview": "cmake_minimum_required(VERSION 3.12)\nproject(EfficientAD-M)\n\nadd_definitions(-w)\nadd_definitions(-D API_EXPORTS)\nset(CMA"
  },
  {
    "path": "efficient_ad/README.md",
    "chars": 1363,
    "preview": "# EfficientAd\n\nEfficientAd: Accurate Visual Anomaly Detection at Millisecond-Level Latencies.\n\nThe Pytorch implementatio"
  },
  {
    "path": "efficient_ad/efficientAD_det.cpp",
    "chars": 9997,
    "preview": "#include <cuda_runtime.h>\n\n#include <chrono>\n#include <cmath>\n#include <cstdint>\n#include <iostream>\n#include <opencv2/o"
  },
  {
    "path": "efficient_ad/src/config.h",
    "chars": 1190,
    "preview": "#pragma once\n\n/* --------------------------------------------------------\n * These configs are related to tensorrt model"
  },
  {
    "path": "efficient_ad/src/cuda_utils.h",
    "chars": 843,
    "preview": "#ifndef TRTX_CUDA_UTILS_H_\n#define TRTX_CUDA_UTILS_H_\n\n#include <cuda_runtime_api.h>\n\n#ifndef CUDA_CHECK\n#define CUDA_CH"
  },
  {
    "path": "efficient_ad/src/logging.h",
    "chars": 16706,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "efficient_ad/src/macros.h",
    "chars": 484,
    "preview": "#ifndef __MACROS_H\n#define __MACROS_H\n\n#include <NvInfer.h>\n\n#ifdef API_EXPORTS\n#if defined(_MSC_VER)\n#define API __decl"
  },
  {
    "path": "efficient_ad/src/model.cpp",
    "chars": 19148,
    "preview": "#include \"model.h\"\n\n#include <cassert>\n#include <cmath>\n#include <cstring>\n#include <fstream>\n#include <iostream>\n#inclu"
  },
  {
    "path": "efficient_ad/src/model.h",
    "chars": 358,
    "preview": "#pragma once\n\n#include <NvInfer.h>\n\n#include <string>\n\nnvinfer1::ICudaEngine* build_efficientAD_engine(unsigned int maxB"
  },
  {
    "path": "efficient_ad/src/postprocess.h",
    "chars": 280,
    "preview": "#pragma once\n\n#include <opencv2/opencv.hpp>\n\nvoid genHeatMap(cv::Mat originImg, cv::Mat& anomalyGrayMap, cv::Mat& HeatMa"
  },
  {
    "path": "efficient_ad/src/utils.h",
    "chars": 812,
    "preview": "#pragma once\n\n#include <dirent.h>\n#include <cstring>\n#include <fstream>\n#include <sstream>\n#include <string>\n#include <u"
  },
  {
    "path": "efficientnet/CMakeLists.txt",
    "chars": 744,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(efficientnet)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUN"
  },
  {
    "path": "efficientnet/README.md",
    "chars": 916,
    "preview": "# EfficientNet\n\nA TensorRT implementation of EfficientNet.\nFor the Pytorch implementation, you can refer to [EfficientNe"
  },
  {
    "path": "efficientnet/efficientnet.cpp",
    "chars": 10039,
    "preview": "#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\n#include \"logging.h\"\n#include <fstream>\n#include <iostream>\n#include "
  },
  {
    "path": "efficientnet/gen_wts.py",
    "chars": 471,
    "preview": "import torch\nimport struct\nfrom efficientnet_pytorch import EfficientNet\nmodel = EfficientNet.from_pretrained('efficient"
  },
  {
    "path": "efficientnet/logging.h",
    "chars": 16550,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "efficientnet/utils.hpp",
    "chars": 9439,
    "preview": "#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\n#include \"logging.h\"\n#include <math.h>\n#include <string>\n#include <al"
  },
  {
    "path": "ghostnet/README.md",
    "chars": 2364,
    "preview": "# GhostNet\r\n\r\nGhostNetv1 architecture is from the paper \"GhostNet: More Features from Cheap Operations\" [(https://arxiv."
  },
  {
    "path": "ghostnet/ghostnetv1/CMakeLists.txt",
    "chars": 727,
    "preview": "cmake_minimum_required(VERSION 2.6)\r\n\r\nproject(ghostnetv1)\r\n\r\nadd_definitions(-std=c++11)\r\n\r\noption(CUDA_USE_STATIC_CUDA"
  },
  {
    "path": "ghostnet/ghostnetv1/gen_wts.py",
    "chars": 9593,
    "preview": "\"\"\"\r\nCreates a GhostNet Model as defined in:\r\nGhostNet: More Features from Cheap Operations By Kai Han, Yunhe Wang, Qi T"
  },
  {
    "path": "ghostnet/ghostnetv1/ghostnetv1.cpp",
    "chars": 22070,
    "preview": "#include <chrono>\r\n#include <cmath>\r\n#include <fstream>\r\n#include <iostream>\r\n#include <map>\r\n#include <sstream>\r\n#inclu"
  },
  {
    "path": "ghostnet/ghostnetv1/logging.h",
    "chars": 17137,
    "preview": "/*\r\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\r\n *\r\n * Licensed under the Apache License, Version 2"
  },
  {
    "path": "ghostnet/ghostnetv2/CMakeLists.txt",
    "chars": 727,
    "preview": "cmake_minimum_required(VERSION 2.6)\r\n\r\nproject(ghostnetv2)\r\n\r\nadd_definitions(-std=c++11)\r\n\r\noption(CUDA_USE_STATIC_CUDA"
  },
  {
    "path": "ghostnet/ghostnetv2/gen_wts.py",
    "chars": 11488,
    "preview": "import torch\r\nimport torch.nn as nn\r\nimport torch.onnx\r\nimport struct\r\n\r\nimport torch\r\nimport torch.nn.functional as F\r\n"
  },
  {
    "path": "ghostnet/ghostnetv2/ghostnetv2.cpp",
    "chars": 24957,
    "preview": "#include <chrono>\r\n#include <cmath>\r\n#include <fstream>\r\n#include <iostream>\r\n#include <map>\r\n#include <sstream>\r\n#inclu"
  },
  {
    "path": "ghostnet/ghostnetv2/logging.h",
    "chars": 17137,
    "preview": "/*\r\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\r\n *\r\n * Licensed under the Apache License, Version 2"
  },
  {
    "path": "googlenet/CMakeLists.txt",
    "chars": 1044,
    "preview": "cmake_minimum_required(VERSION 3.14)\n\nproject(\n  googlenet\n  VERSION 0.1\n  LANGUAGES C CXX CUDA)\n\nif(NOT DEFINED CMAKE_C"
  },
  {
    "path": "googlenet/FindTensorRT.cmake",
    "chars": 4430,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nfunction(_guess_path var_name required_files)\n  set(_result \"\")\n\n  foreach(path_"
  },
  {
    "path": "googlenet/README.md",
    "chars": 1142,
    "preview": "# Googlenet\n\n## Introduction\n\nGoogLeNet (Inception v1) model architecture from [Going Deeper with Convolutions](http://a"
  },
  {
    "path": "googlenet/gen_wts.py",
    "chars": 2095,
    "preview": "import struct\n\nimport cv2\nimport numpy as np\nimport torch\nfrom torchvision.models.googlenet import googlenet\n\n\ndef read_"
  },
  {
    "path": "googlenet/googlenet.cpp",
    "chars": 16685,
    "preview": "#include <NvInfer.h>\n#include <cassert>\n#include <chrono>\n#include <cmath>\n#include <opencv2/opencv.hpp>\n#include <vecto"
  },
  {
    "path": "googlenet/logging.h",
    "chars": 16988,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "googlenet/macros.h",
    "chars": 648,
    "preview": "#pragma once\n#include <NvInfer.h>\n\n#ifdef API_EXPORTS\n#if defined(_MSC_VER)\n#define API __declspec(dllexport)\n#else\n#def"
  },
  {
    "path": "googlenet/utils.h",
    "chars": 8398,
    "preview": "#pragma once\n#include <cuda_runtime_api.h>\n#include <algorithm>\n#include <cassert>\n#include <fstream>\n#include <iostream"
  },
  {
    "path": "hrnet/hrnet-image-classification/CMakeLists.txt",
    "chars": 562,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(hrnet)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIME OF"
  },
  {
    "path": "hrnet/hrnet-image-classification/README.md",
    "chars": 1209,
    "preview": "# HRNet\n\nThe Pytorch implementation is [HRNet-Image-Classification](https://github.com/HRNet/HRNet-Image-Classification)"
  },
  {
    "path": "hrnet/hrnet-image-classification/common.hpp",
    "chars": 13034,
    "preview": "#pragma once\n\n#include <fstream>\n#include <map>\n#include <sstream>\n#include <vector>\n#include <opencv2/opencv.hpp>\n#incl"
  },
  {
    "path": "hrnet/hrnet-image-classification/demo.py",
    "chars": 4320,
    "preview": "# ------------------------------------------------------------------------------\n# -------------------------------------"
  },
  {
    "path": "hrnet/hrnet-image-classification/hrnet.cpp",
    "chars": 48607,
    "preview": "#include <fstream>\n#include <iostream>\n#include <map>\n#include <sstream>\n#include <vector>\n#include <chrono>\n#include \"c"
  },
  {
    "path": "hrnet/hrnet-image-classification/logging.h",
    "chars": 16550,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "hrnet/hrnet-semantic-segmentation/CMakeLists.txt",
    "chars": 1124,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(hrnetseg)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIME"
  },
  {
    "path": "hrnet/hrnet-semantic-segmentation/README.md",
    "chars": 3271,
    "preview": "# HRNet-Semantic-Segmentation\n\nThis repo implemtents [HRNet-Semantic-Segmentation-v1.1](https://github.com/HRNet/HRNet-S"
  },
  {
    "path": "hrnet/hrnet-semantic-segmentation/common.hpp",
    "chars": 15941,
    "preview": "#pragma once\n\n#include <fstream>\n#include <map>\n#include <sstream>\n#include <vector>\n#include <opencv2/opencv.hpp>\n#incl"
  },
  {
    "path": "hrnet/hrnet-semantic-segmentation/gen_wts.py",
    "chars": 1741,
    "preview": "import argparse\nimport struct\n\nimport _init_paths\nimport models\nimport torch\nfrom config import config, update_config\n\n\n"
  },
  {
    "path": "hrnet/hrnet-semantic-segmentation/hrnet.cpp",
    "chars": 41712,
    "preview": "#include <fstream>\n#include <iostream>\n#include <map>\n#include <sstream>\n#include <vector>\n#include <chrono>\n#include \"c"
  },
  {
    "path": "hrnet/hrnet-semantic-segmentation/hrnet_ocr.cpp",
    "chars": 47536,
    "preview": "#include <fstream>\n#include <iostream>\n#include <map>\n#include <sstream>\n#include <vector>\n#include <chrono>\n#include \"c"
  },
  {
    "path": "hrnet/hrnet-semantic-segmentation/hrnet_trt.py",
    "chars": 7715,
    "preview": "\"\"\"\nAn example that uses TensorRT's Python api to make inferences for hrnet.\n\"\"\"\nimport os\nimport shutil\nimport random\ni"
  },
  {
    "path": "hrnet/hrnet-semantic-segmentation/logging.h",
    "chars": 16550,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "ibnnet/CMakeLists.txt",
    "chars": 964,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(IBNNet)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIME O"
  },
  {
    "path": "ibnnet/InferenceEngine.cpp",
    "chars": 3718,
    "preview": "#include \"InferenceEngine.h\"\n\nnamespace trt {\n\n   InferenceEngine::InferenceEngine(const EngineConfig &enginecfg): _engi"
  },
  {
    "path": "ibnnet/InferenceEngine.h",
    "chars": 2325,
    "preview": "/**************************************************************************\n * Handle memory pre-alloc\n * both on host(p"
  },
  {
    "path": "ibnnet/README.md",
    "chars": 998,
    "preview": "# IBN-Net\n\nAn implementation of IBN-Net, proposed in [\"Two at Once: Enhancing Learning and Generalization Capacities via"
  },
  {
    "path": "ibnnet/gen_wts.py",
    "chars": 837,
    "preview": "import torch\nimport os\nimport sys\nimport struct\n\n\nassert sys.argv[1] == \"a\" or sys.argv[1] == \"b\"\nmodel_name = \"resnet50"
  },
  {
    "path": "ibnnet/holder.h",
    "chars": 1060,
    "preview": "#pragma once\n\ntemplate <typename T>\nclass TensorRTHolder {\n    T* holder;\npublic:\n    explicit TensorRTHolder(T* holder_"
  },
  {
    "path": "ibnnet/ibnnet.cpp",
    "chars": 8424,
    "preview": "#include \"ibnnet.h\"\n\n//#define USE_FP16\n\nnamespace trt {\n\n    IBNNet::IBNNet(trt::EngineConfig &enginecfg, const IBN ibn"
  },
  {
    "path": "ibnnet/ibnnet.h",
    "chars": 1113,
    "preview": "#pragma once\n\n#include \"utils.h\"\n#include \"holder.h\"\n#include \"layers.h\"\n#include \"InferenceEngine.h\"\n#include <memory>\n"
  },
  {
    "path": "ibnnet/layers.cpp",
    "chars": 9103,
    "preview": "#include \"layers.h\"\n\nnamespace trtxapi {\n\n    ITensor* MeanStd(INetworkDefinition *network, std::map<std::string, Weight"
  },
  {
    "path": "ibnnet/layers.h",
    "chars": 1277,
    "preview": "#pragma once\n\n#include <map>\n#include <math.h>\n#include <assert.h>\n#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\nus"
  },
  {
    "path": "ibnnet/logging.h",
    "chars": 16550,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "ibnnet/main.cpp",
    "chars": 2942,
    "preview": "#include <thread>\n#include <vector>\n#include <memory>\n#include \"ibnnet.h\"\n#include \"InferenceEngine.h\"\n\n// stuff we know"
  },
  {
    "path": "ibnnet/utils.cpp",
    "chars": 1157,
    "preview": "#include \"utils.h\"\n\n// Load weights from files shared with TensorRT samples.\n// TensorRT weight files have a simple spac"
  },
  {
    "path": "ibnnet/utils.h",
    "chars": 897,
    "preview": "#pragma once\n\n#include <map>\n#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\n#include \"assert.h\"\n#include <fstream>\n#"
  },
  {
    "path": "inception/inceptionv3/CMakeLists.txt",
    "chars": 702,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(inception)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIM"
  },
  {
    "path": "inception/inceptionv3/README.md",
    "chars": 1129,
    "preview": "# Inception v3\n\nInception v3 model architecture from \"Rethinking the Inception Architecture for Computer Vision\" <http:/"
  },
  {
    "path": "inception/inceptionv3/inception_v3.cpp",
    "chars": 20434,
    "preview": "#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\n#include \"logging.h\"\n#include <fstream>\n#include <iostream>\n#include "
  },
  {
    "path": "inception/inceptionv3/logging.h",
    "chars": 16550,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "inception/inceptionv4/CMakeLists.txt",
    "chars": 989,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(InceptionV4)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNT"
  },
  {
    "path": "inception/inceptionv4/README.md",
    "chars": 1254,
    "preview": "# Inception v4\n\nInception v4 model architecture from \"Inception-v4, Inception-ResNet and the Impact of Residual Connecti"
  },
  {
    "path": "inception/inceptionv4/inception_v4.cpp",
    "chars": 8961,
    "preview": "# include \"inception_v4.h\"\n\n\nnamespace trtx {\n    InceptionV4::InceptionV4(const InceptionV4Params &params)\n    : mParam"
  },
  {
    "path": "inception/inceptionv4/inception_v4.h",
    "chars": 1896,
    "preview": "#ifndef TRTX_INCEPTION_NETWORK_H\n#define TRTX_INCEPTION_NETWORK_H\n\n\n#include <memory>\n#include <vector>\n#include <chrono"
  },
  {
    "path": "inception/inceptionv4/layers_api.cpp",
    "chars": 14932,
    "preview": "#include \"layers_api.h\"\n\nnamespace trtxlayers {\n    IScaleLayer* addBatchNorm2d(\n        INetworkDefinition *network, \n "
  },
  {
    "path": "inception/inceptionv4/layers_api.h",
    "chars": 2242,
    "preview": "#ifndef TRTX_LAYERS_API_H\n#define TRTX_LAYERS_API_H\n\n#include <map>\n#include <math.h>\n#include <assert.h>\n#include <iost"
  },
  {
    "path": "inception/inceptionv4/logging.h",
    "chars": 16650,
    "preview": "/*\n * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "inception/inceptionv4/main.cpp",
    "chars": 2317,
    "preview": "#include \"inception_v4.h\"\n\n\n/**\n * Initializes Inception class params in the \n * InceptionV4Params structure.\n**/\ntrtx::"
  },
  {
    "path": "inception/inceptionv4/utils.cpp",
    "chars": 1151,
    "preview": "# include \"utils.h\"\n\n\n// Load weights from files.\n// TensorRT weight files have a simple space delimited format:\n// [typ"
  },
  {
    "path": "inception/inceptionv4/utils.h",
    "chars": 608,
    "preview": "# ifndef TRTX_UTILS_H\n# define TRTX_UTILS_H\n\n#include <map>\n#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\n#include "
  },
  {
    "path": "lenet/CMakeLists.txt",
    "chars": 1284,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nproject(\n  lenet\n  VERSION 0.1\n  LANGUAGES C CXX CUDA)\n\nif(NOT DEFINED CMAKE_CUD"
  },
  {
    "path": "lenet/FindTensorRT.cmake",
    "chars": 4430,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nfunction(_guess_path var_name required_files)\n  set(_result \"\")\n\n  foreach(path_"
  },
  {
    "path": "lenet/README.md",
    "chars": 1814,
    "preview": "# lenet5\n\nlenet5 is one of the simplest net in this repo. You can learn the basic procedures of building CNN from Tensor"
  },
  {
    "path": "lenet/gen_wts.py",
    "chars": 2855,
    "preview": "import struct\nfrom collections import OrderedDict\n\nimport cv2\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\n\ncl"
  },
  {
    "path": "lenet/lenet.cpp",
    "chars": 13434,
    "preview": "#include <NvInfer.h>\n#include <cassert>\n#include <chrono>\n#include <cmath>\n#include <exception>\n#include <filesystem>\n#i"
  },
  {
    "path": "lenet/lenet.py",
    "chars": 6018,
    "preview": "import argparse\nimport os\nimport struct\nimport sys\n\nimport numpy as np\nimport pycuda.autoinit  # noqa: F401\nimport pycud"
  },
  {
    "path": "lenet/lenet_tripy.py",
    "chars": 2415,
    "preview": "import argparse\nimport os\nimport struct\n\nimport nvtripy as tp\n\nINPUT_SHAPE = (1, 1, 32, 32)\nWEIGHT_PATH = \"lenet5.wts\"\nC"
  },
  {
    "path": "lenet/logging.h",
    "chars": 16988,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "lenet/macros.h",
    "chars": 648,
    "preview": "#pragma once\n#include <NvInfer.h>\n\n#ifdef API_EXPORTS\n#if defined(_MSC_VER)\n#define API __declspec(dllexport)\n#else\n#def"
  },
  {
    "path": "lenet/utils.h",
    "chars": 3306,
    "preview": "#pragma once\n#include <cuda_runtime_api.h>\n#include <algorithm>\n#include <cassert>\n#include <cstddef>\n#include <cstdint>"
  },
  {
    "path": "lprnet/CMakeLists.txt",
    "chars": 1288,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nproject(\n  lprnet\n  VERSION 0.1\n  LANGUAGES C CXX CUDA)\n\nif(NOT DEFINED CMAKE_CU"
  },
  {
    "path": "lprnet/FindTensorRT.cmake",
    "chars": 4430,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nfunction(_guess_path var_name required_files)\n  set(_result \"\")\n\n  foreach(path_"
  },
  {
    "path": "lprnet/README.md",
    "chars": 1496,
    "preview": "# LPRNet\n\nThe Pytorch implementation is [xuexingyu24/License_Plate_Detection_Pytorch](https://github.com/xuexingyu24/Lic"
  },
  {
    "path": "lprnet/gen_wts.py",
    "chars": 4906,
    "preview": "\"\"\"\nmodel codes are borrowed from:\n`https://github.com/xuexingyu24/License_Plate_Detection_Pytorch/blob/master/LPRNet/mo"
  },
  {
    "path": "lprnet/logging.h",
    "chars": 16988,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "lprnet/lprnet.cpp",
    "chars": 18551,
    "preview": "#include <NvInfer.h>\n#include <algorithm>\n#include <array>\n#include <chrono>\n#include <cstdint>\n#include <fstream>\n#incl"
  },
  {
    "path": "lprnet/macros.h",
    "chars": 648,
    "preview": "#pragma once\n#include <NvInfer.h>\n\n#ifdef API_EXPORTS\n#if defined(_MSC_VER)\n#define API __declspec(dllexport)\n#else\n#def"
  },
  {
    "path": "lprnet/utils.h",
    "chars": 8402,
    "preview": "#pragma once\n#include <cuda_runtime_api.h>\n#include <algorithm>\n#include <cassert>\n#include <fstream>\n#include <iostream"
  },
  {
    "path": "mlp/CMakeLists.txt",
    "chars": 990,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nproject(\n  mlp\n  VERSION 0.1\n  LANGUAGES C CXX CUDA)\n\nif(NOT DEFINED CMAKE_CUDA_"
  },
  {
    "path": "mlp/FindTensorRT.cmake",
    "chars": 4430,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nfunction(_guess_path var_name required_files)\n  set(_result \"\")\n\n  foreach(path_"
  },
  {
    "path": "mlp/README.md",
    "chars": 1213,
    "preview": "# mlp\n\nMLP is the most basic net in this tensorrtx project for starters. You can learn the basic procedures of building "
  },
  {
    "path": "mlp/logging.h",
    "chars": 16988,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "mlp/macros.h",
    "chars": 648,
    "preview": "#pragma once\n#include <NvInfer.h>\n\n#ifdef API_EXPORTS\n#if defined(_MSC_VER)\n#define API __declspec(dllexport)\n#else\n#def"
  },
  {
    "path": "mlp/mlp.cpp",
    "chars": 8929,
    "preview": "#include <array>\n#include <chrono>\n#include <iostream>\n#include <numeric>\n#include <vector>\n#include \"logging.h\"\n#includ"
  },
  {
    "path": "mlp/mlp.py",
    "chars": 7782,
    "preview": "import argparse\nimport os\nimport numpy as np\nimport struct\n\n# required for the model creation\nimport tensorrt as trt\n\n# "
  },
  {
    "path": "mlp/utils.h",
    "chars": 2641,
    "preview": "#pragma once\n#include <cuda_runtime_api.h>\n#include <cassert>\n#include <fstream>\n#include <iostream>\n#include <map>\n#inc"
  },
  {
    "path": "mnasnet/CMakeLists.txt",
    "chars": 1114,
    "preview": "cmake_minimum_required(VERSION 3.14)\n\nproject(\n  mnasnet\n  VERSION 0.1\n  LANGUAGES C CXX CUDA)\n\nif(NOT DEFINED CMAKE_CUD"
  },
  {
    "path": "mnasnet/FindTensorRT.cmake",
    "chars": 4430,
    "preview": "cmake_minimum_required(VERSION 3.17.0)\n\nfunction(_guess_path var_name required_files)\n  set(_result \"\")\n\n  foreach(path_"
  },
  {
    "path": "mnasnet/README.md",
    "chars": 1166,
    "preview": "# mnasnet\n\nMNASNet with depth multiplier of 0.5 from\n\"MnasNet: Platform-Aware Neural Architecture Search for Mobile\" <ht"
  },
  {
    "path": "mnasnet/gen_wts.py",
    "chars": 2181,
    "preview": "import struct\n\nimport cv2\nimport numpy as np\nimport torch\nfrom torchvision.models import mnasnet0_5\n\n\nMODELS = [(\"mnasne"
  },
  {
    "path": "mnasnet/logging.h",
    "chars": 16988,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "mnasnet/macros.h",
    "chars": 648,
    "preview": "#pragma once\n#include <NvInfer.h>\n\n#ifdef API_EXPORTS\n#if defined(_MSC_VER)\n#define API __declspec(dllexport)\n#else\n#def"
  },
  {
    "path": "mnasnet/mnasnet.cpp",
    "chars": 16702,
    "preview": "#include <NvInfer.h>\n#include <chrono>\n#include <cmath>\n#include <fstream>\n#include <iostream>\n#include <map>\n#include <"
  },
  {
    "path": "mnasnet/utils.h",
    "chars": 8434,
    "preview": "#pragma once\n#include <cuda_runtime_api.h>\n#include <algorithm>\n#include <cassert>\n#include <fstream>\n#include <iostream"
  },
  {
    "path": "mobilenet/mobilenetv2/CMakeLists.txt",
    "chars": 701,
    "preview": "cmake_minimum_required(VERSION 2.6)\n\nproject(mobilenet)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIM"
  },
  {
    "path": "mobilenet/mobilenetv2/README.md",
    "chars": 1450,
    "preview": "# mobilenet v2\n\nMobileNetV2 architecture from\n     \"MobileNetV2: Inverted Residuals and Linear Bottlenecks\" <https://arx"
  },
  {
    "path": "mobilenet/mobilenetv2/logging.h",
    "chars": 16673,
    "preview": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 "
  },
  {
    "path": "mobilenet/mobilenetv2/mobilenet_v2.cpp",
    "chars": 15036,
    "preview": "#include <chrono>\n#include <cmath>\n#include <fstream>\n#include <iostream>\n#include <map>\n#include <sstream>\n#include <ve"
  }
]

// ... and 544 more files (download for full content)

About this extraction

This page contains the full source code of the wang-xinyu/tensorrtx GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 744 files (5.6 MB), approximately 1.5M tokens, and a symbol index with 2883 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo