Showing preview only (2,355K chars total). Download the full file or copy to clipboard to get everything.
Repository: NVIDIA/tensorrt-laboratory
Branch: v2
Commit: 33b6fdf2935c
Files: 406
Total size: 2.2 MB
Directory structure:
gitextract_8sd61058/
├── .bazelrc
├── .clang-format
├── .dockerignore
├── .gitmodules
├── BUILD.bazel
├── CLA
├── CMakeLists.txt
├── CREDITS.md
├── Dockerfile
├── LICENSE
├── README.md
├── WORKSPACE
├── bazel/
│ ├── BUILD.bazel
│ ├── cuda_configure.bzl
│ ├── repositories.bzl
│ └── tensorrt_configure.bzl
├── build.sh
├── cmake/
│ ├── FindTensorRT.cmake
│ ├── Findcpuaff.cmake
│ ├── GRPCGenerateCPP.cmake
│ ├── GRPCGenerateCPPLikeBazel.cmake
│ ├── LibFindMacros.cmake
│ ├── ProtobufGenerateCPPLikeBazel.cmake
│ └── dependencies.cmake
├── devel.sh
├── examples/
│ ├── 00_TensorRT/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── infer.cc
│ │ └── inference.cc
│ ├── 01_Basic_GRPC/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── src/
│ │ ├── async_client.cc
│ │ ├── client.cpp
│ │ └── server.cpp
│ ├── 02_TensorRT_GRPC/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── src/
│ │ ├── async-client.cc
│ │ ├── metrics.cc
│ │ ├── metrics.h
│ │ ├── server.cc
│ │ ├── siege.cc
│ │ └── sync-client.cc
│ ├── 03_Batching/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── inference-batcher.cc
│ │ ├── launch_batching.sh
│ │ ├── simple_batching_client.py
│ │ ├── simple_pb2.py
│ │ ├── simple_pb2_grpc.py
│ │ ├── streaming-service.cc
│ │ └── unary_client.py
│ ├── 04_Middleman/
│ │ ├── CMakeLists.txt
│ │ └── middleman-client.cc
│ ├── 10_Internals/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── internals.cc
│ ├── 11_Protos/
│ │ ├── CMakeLists.txt
│ │ ├── demo/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── dataset.proto
│ │ │ └── inference.proto
│ │ ├── echo/
│ │ │ ├── CMakeLists.txt
│ │ │ └── echo.proto
│ │ └── inference/
│ │ ├── CMakeLists.txt
│ │ ├── api.proto
│ │ ├── model_config.proto
│ │ ├── nvidia_inference.proto
│ │ ├── request_status.proto
│ │ └── server_status.proto
│ ├── 12_ConfigGenerator/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── generator.cc
│ │ ├── link.sh
│ │ └── ms_mgmt
│ ├── 12_FlatBuffers/
│ │ ├── CMakeLists.txt
│ │ ├── client.cc
│ │ ├── example.fbs
│ │ ├── example.grpc.fb.cc
│ │ ├── example.grpc.fb.h
│ │ ├── example_generated.h
│ │ └── server.cc
│ ├── 30_PyTensorRT/
│ │ ├── README.md
│ │ ├── client.py
│ │ ├── compute.py
│ │ ├── infer_test_utils.py
│ │ ├── rebuild.sh
│ │ └── server.py
│ ├── 90_Kubernetes/
│ │ ├── README.md
│ │ ├── bootstrap-minikube.sh
│ │ ├── deploy/
│ │ │ └── build-and-run.sh
│ │ ├── devel/
│ │ │ ├── README.md
│ │ │ └── yais-devel.yml
│ │ ├── istio/
│ │ │ ├── README.md
│ │ │ └── rendered/
│ │ │ ├── istio-v0.8-minikube.yml
│ │ │ └── istio-v1.0-minikube.yml
│ │ ├── minikube/
│ │ │ ├── README.md
│ │ │ └── bootstrap.sh
│ │ ├── prometheus/
│ │ │ ├── bootstrap.sh
│ │ │ ├── custom-settings.yml
│ │ │ ├── service-account.yml
│ │ │ ├── yais-dashboard.json
│ │ │ └── yais-metrics.yml
│ │ └── yais-deploy.yml
│ ├── 91_Prometheus/
│ │ ├── README.md
│ │ └── scrape.conf
│ ├── 97_SingleProcessMultiSteam/
│ │ └── launch_service.sh
│ ├── 98_MultiProcessSingleStream/
│ │ ├── README.md
│ │ ├── run_latency_test
│ │ ├── run_throughput_test
│ │ └── setup.py
│ ├── 99_LoadBalancer/
│ │ ├── README.md
│ │ ├── lb-envoy.j2
│ │ └── run_loadbalancer.py
│ ├── CMakeLists.txt
│ ├── Deployment/
│ │ ├── CMakeLists.txt
│ │ ├── ImageClient/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── api.proto
│ │ │ ├── client.cc
│ │ │ ├── client.h
│ │ │ └── client.py
│ │ ├── Kubernetes/
│ │ │ └── basic-trtis-deployment/
│ │ │ ├── deploy.yml
│ │ │ ├── istio-ingress.yml
│ │ │ └── scrape-metrics.yml
│ │ ├── ObjectStore/
│ │ │ ├── README.md
│ │ │ ├── create_buckets.py
│ │ │ ├── get_rook_s3_keys.sh
│ │ │ ├── ingress-istio.yml
│ │ │ ├── ingress-nginx.yml
│ │ │ └── rook-s3.yml
│ │ ├── README.md
│ │ ├── RouteRequests/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── README.md
│ │ │ ├── envoy_config.yaml
│ │ │ ├── test_client.py
│ │ │ ├── test_routing.sh
│ │ │ └── test_service.cc
│ │ └── batcher.cc
│ ├── ONNX/
│ │ └── resnet50/
│ │ ├── README.md
│ │ ├── build.py
│ │ ├── calibration_images.csv
│ │ ├── calibrator.py
│ │ ├── fetch.sh
│ │ ├── imagenet_labels.py
│ │ ├── int8.py
│ │ ├── onnx_utils.py
│ │ ├── open_source_images.md5
│ │ ├── resnet50.md5
│ │ ├── run_jpeg_test.py
│ │ └── run_onnx_tests.py
│ └── nvRPC/
│ ├── CMakeLists.txt
│ ├── SharedMemoryService/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── client.cc
│ │ └── server.cc
│ ├── StreamingInOrderSendRecv/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── client.cc
│ │ ├── server.cc
│ │ └── test.sh
│ ├── StreamingService/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── client.cc
│ │ ├── common.h
│ │ ├── even-odds.cc
│ │ ├── ping-pong.cc
│ │ └── test.sh
│ └── UnaryService/
│ ├── CMakeLists.txt
│ ├── client.cc
│ └── server.cc
├── jupyter_notebook_config.py
├── models/
│ ├── README.md
│ ├── ResNet-152-deploy.prototxt
│ ├── ResNet-50-deploy.prototxt
│ ├── mps_builder
│ ├── onnx/
│ │ ├── common.py
│ │ ├── mnist-v1.3/
│ │ │ ├── model.onnx
│ │ │ ├── test_data_set_0/
│ │ │ │ ├── input_0.pb
│ │ │ │ └── output_0.pb
│ │ │ ├── test_data_set_1/
│ │ │ │ ├── input_0.pb
│ │ │ │ └── output_0.pb
│ │ │ └── test_data_set_2/
│ │ │ ├── input_0.pb
│ │ │ └── output_0.pb
│ │ └── onnx_builder.py
│ └── setup.py
├── notebooks/
│ ├── Demo Day 1.ipynb
│ ├── Demo Day 2.ipynb
│ ├── Demo Day 3.ipynb
│ ├── Multiple Models.ipynb
│ ├── Quickstart.ipynb
│ └── README.md
├── requirements.txt
└── trtlab/
├── BUILD.bazel
├── CMakeLists.txt
├── core/
│ ├── BUILD.bazel
│ ├── CMakeLists.txt
│ ├── benchmarks/
│ │ ├── CMakeLists.txt
│ │ ├── bench_batcher.cc
│ │ ├── bench_memory.cc
│ │ ├── bench_memory_stack.cc
│ │ ├── bench_pool.cc
│ │ ├── bench_thread_pool.cc
│ │ └── main.cc
│ ├── include/
│ │ └── trtlab/
│ │ └── core/
│ │ ├── affinity.h
│ │ ├── async_compute.h
│ │ ├── batcher.h
│ │ ├── cyclic_buffer.h
│ │ ├── cyclic_windowed_buffer.h
│ │ ├── dispatcher.h
│ │ ├── fiber_group.h
│ │ ├── hybrid_condition.h
│ │ ├── hybrid_mutex.h
│ │ ├── memory/
│ │ │ └── first_touch_allocator.h
│ │ ├── pool.h
│ │ ├── ranges.h
│ │ ├── resources.h
│ │ ├── standard_threads.h
│ │ ├── task_pool.h
│ │ ├── thread_pool.h
│ │ ├── types.h
│ │ ├── userspace_threads.h
│ │ └── utils.h
│ ├── src/
│ │ ├── affinity.cc
│ │ ├── cyclic_buffer.cc
│ │ ├── cyclic_windowed_buffer.cc
│ │ ├── memory/
│ │ │ ├── copy.cc
│ │ │ ├── host_memory.cc
│ │ │ ├── malloc.cc
│ │ │ ├── memory.cc
│ │ │ ├── sysv_allocator.cc
│ │ │ └── tensor_shape.cc
│ │ ├── types.cc
│ │ └── utils.cc
│ └── tests/
│ ├── BUILD.bazel
│ ├── CMakeLists.txt
│ ├── test_affinity.cc
│ ├── test_async.cc
│ ├── test_async_compute.cc
│ ├── test_batcher.cc
│ ├── test_common.cc
│ ├── test_common.h
│ ├── test_cyclic_allocator.cc
│ ├── test_cyclic_windowed_buffer.cc
│ ├── test_foo_memory.cc
│ ├── test_main.cc
│ ├── test_memory.cc
│ ├── test_memory_old.cc
│ ├── test_memory_stack.cc
│ ├── test_pool.cc
│ ├── test_stl_allocator.cc
│ ├── test_sysv_allocator.cc
│ ├── test_tensor.cc
│ ├── test_thread_pool.cc
│ ├── test_transactional_allocator.h
│ └── test_types.cc
├── cuda/
│ ├── BUILD.bazel
│ ├── CMakeLists.txt
│ ├── benchmarks/
│ │ ├── CMakeLists.txt
│ │ ├── bench_cuda_memory.cc
│ │ └── bench_main.cc
│ ├── include/
│ │ └── trtlab/
│ │ └── cuda/
│ │ ├── common.h
│ │ ├── cyclic_windowed_buffer.h
│ │ ├── device_guard.h
│ │ ├── device_info.h
│ │ ├── memory/
│ │ │ ├── cuda_allocators.h
│ │ │ └── device_memory.h
│ │ └── sync.h
│ ├── src/
│ │ ├── copy.cc
│ │ ├── cuda_allocators.cc
│ │ ├── device_guard.cc
│ │ └── device_info.cc
│ └── tests/
│ ├── CMakeLists.txt
│ ├── test_allocators.cc
│ ├── test_device_info.cc
│ ├── test_main.cc
│ └── test_memory.cc
├── memory/
│ ├── CMakeLists.txt
│ ├── benchmarks/
│ │ ├── CMakeLists.txt
│ │ ├── bench_memory.cc
│ │ ├── bench_memory_pool.cc
│ │ └── main.cc
│ ├── cmake/
│ │ ├── configuration.cmake
│ │ └── dependencies.cmake
│ ├── include/
│ │ └── trtlab/
│ │ └── memory/
│ │ ├── align.h
│ │ ├── allocator.h
│ │ ├── allocator_storage.h
│ │ ├── allocator_traits.h
│ │ ├── bfit_allocator.h
│ │ ├── block_allocators.h
│ │ ├── block_arena.h
│ │ ├── block_manager.h
│ │ ├── block_stack.h
│ │ ├── config.h
│ │ ├── debugging.h
│ │ ├── deleter.h
│ │ ├── descriptor.h
│ │ ├── detail/
│ │ │ ├── assert.h
│ │ │ ├── block_list.h
│ │ │ ├── container_node_sizes.h
│ │ │ ├── debug_helpers.h
│ │ │ ├── free_list.h
│ │ │ ├── memory_stack.h
│ │ │ ├── page_info.h
│ │ │ ├── ranges.h
│ │ │ └── utility.h
│ │ ├── error.h
│ │ ├── huge_page_allocator.h
│ │ ├── literals.h
│ │ ├── malloc_allocator.h
│ │ ├── memory_block.h
│ │ ├── memory_pool.h
│ │ ├── memory_resource.h
│ │ ├── memory_type.h
│ │ ├── memory_typed_allocator.h
│ │ ├── posix_aligned_allocator.h
│ │ ├── raii_allocator.h
│ │ ├── smart_ptr.h
│ │ ├── std_allocator.h
│ │ ├── threading.h
│ │ ├── trackers.h
│ │ ├── tracking.h
│ │ ├── transactional_allocator.h
│ │ └── utils.h
│ ├── src/
│ │ ├── CMakeLists.txt
│ │ ├── align.cc
│ │ ├── block_stack.cc
│ │ ├── config.h.in
│ │ ├── descriptor.cc
│ │ ├── detail/
│ │ │ ├── block_list.cc
│ │ │ ├── free_list.cc
│ │ │ ├── free_list_utils.h
│ │ │ └── page_info.c
│ │ ├── error.cc
│ │ ├── ilog2.h
│ │ ├── memory_type.cc
│ │ ├── trackers.cc
│ │ └── utils.cc
│ ├── tests/
│ │ ├── CMakeLists.txt
│ │ ├── test_main.cc
│ │ └── test_memory.cc
│ └── tools/
│ ├── CMakeLists.txt
│ ├── node_size_debugger.cpp
│ ├── node_size_debugger.hpp
│ └── test_types.hpp
├── nvrpc/
│ ├── BUILD.bazel
│ ├── CMakeLists.txt
│ ├── include/
│ │ └── nvrpc/
│ │ ├── client/
│ │ │ ├── base_context.h
│ │ │ ├── client_single_up_multiple_down.h
│ │ │ ├── client_streaming.h
│ │ │ ├── client_streaming_v2.h
│ │ │ ├── client_streaming_v3.h
│ │ │ ├── client_unary.h
│ │ │ ├── client_unary_v2.h
│ │ │ └── executor.h
│ │ ├── context.h
│ │ ├── executor.h
│ │ ├── fiber/
│ │ │ └── executor.h
│ │ ├── interfaces.h
│ │ ├── life_cycle_batching.h
│ │ ├── life_cycle_bidirectional.h
│ │ ├── life_cycle_streaming.h
│ │ ├── life_cycle_unary.h
│ │ ├── rpc.h
│ │ ├── server.h
│ │ └── service.h
│ ├── src/
│ │ ├── client/
│ │ │ └── client_executor.cc
│ │ ├── executor.cc
│ │ └── server.cc
│ └── tests/
│ ├── CMakeLists.txt
│ ├── test_build_client.h
│ ├── test_build_server.h
│ ├── test_pingpong.cc
│ ├── test_pingpong.h
│ ├── test_resources.cc
│ ├── test_resources.h
│ ├── test_server.cc
│ └── testing.proto
├── pybind/
│ ├── CMakeLists.txt
│ └── trtlab/
│ ├── CMakeLists.txt
│ ├── infer.cc
│ ├── utils.cc
│ └── utils.h
└── tensorrt/
├── BUILD.bazel
├── CMakeLists.txt
├── include/
│ └── trtlab/
│ └── tensorrt/
│ ├── allocator.h
│ ├── bindings.h
│ ├── buffers.h
│ ├── common.h
│ ├── execution_context.h
│ ├── infer_bench.h
│ ├── infer_runner.h
│ ├── inference_manager.h
│ ├── model.h
│ ├── runtime.h
│ ├── utils.h
│ └── workspace.h
├── src/
│ ├── allocator.cc
│ ├── bindings.cc
│ ├── buffers.cc
│ ├── execution_context.cc
│ ├── infer_bench.cc
│ ├── inference_manager.cc
│ ├── model.cc
│ ├── runtime.cc
│ ├── utils.cc
│ └── workspace.cc
└── tests/
├── CMakeLists.txt
└── test_buffers.cc
================================================
FILE CONTENTS
================================================
================================================
FILE: .bazelrc
================================================
build --cxxopt=-std=c++1z
build --incompatible_remove_native_http_archive=false
build --incompatible_package_name_is_a_function=false
================================================
FILE: .clang-format
================================================
#BasedOnStyle: Google
Language: Cpp
# BasedOnStyle: LLVM
AccessModifierOffset: -2
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands: true
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortFunctionsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: true
AfterControlStatement: true
AfterEnum: true
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: true
AfterUnion: true
BeforeCatch: true
BeforeElse: true
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IncludeIsMainRegex: '$'
IndentCaseLabels: true
IndentWidth: 4
IndentWrappedFunctionNames: true
JavaScriptQuotes: Leave
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 100
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: Never
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: false
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 4
UseTab: Never
================================================
FILE: .dockerignore
================================================
build
*.engine
models
@eaDir
__pycache__
bazel-*
================================================
FILE: .gitmodules
================================================
[submodule "third_party/cpuaff"]
path = third_party/cpuaff
url = https://github.com/dcdillon/cpuaff
[submodule "third_party/gflags"]
path = third_party/gflags
url = https://github.com/gflags/gflags.git
[submodule "third_party/glog"]
path = third_party/glog
url = https://github.com/google/glog.git
[submodule "third_party/grpc"]
path = third_party/grpc
url = https://github.com/grpc/grpc
[submodule "third_party/wait-for-it"]
path = third_party/wait-for-it
url = https://github.com/vishnubob/wait-for-it
[submodule "third_party/benchmark"]
path = third_party/benchmark
url = https://github.com/google/benchmark.git
[submodule "third_party/googletest"]
path = third_party/googletest
url = https://github.com/google/googletest.git
[submodule "third_party/pybind11"]
path = third_party/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "third_party/flatbuffers"]
path = third_party/flatbuffers
url = https://github.com/google/flatbuffers.git
================================================
FILE: BUILD.bazel
================================================
package(default_visibility = ["//visibility:public"])
================================================
FILE: CLA
================================================
The NVIDIA TensorRT Laboratory
Software Grant and Corporate Contributor License Agreement ("Agreement")
Thank you for your interest in the NVIDIA TensorRT Laboratory Project
(the "Project"). In order to clarify the intellectual property license
granted with Contributions from any person or entity, NVIDIA
Corporation (the “Copyright Holders") must have a Contributor License
Agreement (CLA) on file that has been signed by each Contributor,
indicating agreement to the license terms below. This license is
for your protection as a Contributor as well as the protection of the
Project and its users; it does not change your rights to use your own
Contributions for any other purpose.
This version of the Agreement allows an entity (the "Corporation") to
submit Contributions to the Project, to authorize Contributions
submitted by its designated employees to the Project, and to grant
copyright and patent licenses thereto to the Copyright Holders.
If you have not already done so, please complete and sign, then scan and
email a pdf file of this Agreement to rolson@nvidia.com.
Please read this document carefully before signing and keep a copy for
your records.
Corporation name: ________________________________________________
Corporation address: ________________________________________________
________________________________________________
________________________________________________
Point of Contact: ________________________________________________
E-Mail: ________________________________________________
Telephone: _____________________ Fax: _____________________
You accept and agree to the following terms and conditions for Your
present and future Contributions submitted to the Project. In
return, the Copyright Holders shall not use Your Contributions in a way
that is contrary to the public benefit or inconsistent with its nonprofit
status and bylaws in effect at the time of the Contribution. Except
for the license granted herein to the Copyright Holders and recipients of
software distributed by the Copyright Holders, You reserve all right, title,
and interest in and to Your Contributions.
1. Definitions.
"You" (or "Your") shall mean the copyright owner or legal entity
authorized by the copyright owner that is making this Agreement
with the Copyright Holders. For legal entities, the entity making a
Contribution and all other entities that control, are controlled by,
or are under common control with that entity are considered to be a
single Contributor. For the purposes of this definition, "control"
means (i) the power, direct or indirect, to cause the direction or
management of such entity, whether by contract or otherwise, or
(ii) ownership of fifty percent (50%) or more of the outstanding
shares, or (iii) beneficial ownership of such entity.
"Contribution" shall mean the code, documentation or other original
works of authorship expressly identified in Schedule B, as well as
any original work of authorship, including
any modifications or additions to an existing work, that is intentionally
submitted by You to the Copyright Holders for inclusion in, or
documentation of, any of the products owned or managed by the
Copyright Holders (the "Work"). For the purposes of this definition,
"submitted" means any form of electronic, verbal, or written
communication sent to the Copyright Holders or its representatives,
including but not limited to communication on electronic mailing
lists, source code control systems, and issue tracking systems
that are managed by, or on behalf of, the Copyright Holders for the
purpose of discussing and improving the Work, but excluding
communication that is conspicuously marked or otherwise designated
in writing by You as "Not a Contribution."
2. Grant of Copyright License. Subject to the terms and conditions
of this Agreement, You hereby grant to the Copyright Holders and to
recipients of software distributed by the Copyright Holders a
perpetual, worldwide, non-exclusive, no-charge, royalty-free,
irrevocable copyright license to reproduce, prepare derivative works
of, publicly display, publicly perform, sublicense, and distribute
Your Contributions and such derivative works.
3. Grant of Patent License. Subject to the terms and conditions of
this Agreement, You hereby grant to the Copyright Holders and to
recipients of software distributed by the Copyright Holders
a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license
to make, have made, use, offer to sell, sell, import, and otherwise
transfer the Work, where such license applies only to those
patent claims licensable by You that are necessarily infringed
by Your Contribution(s) alone or by combination of Your Contribution(s)
with the Work to which such Contribution(s) were submitted.
If any entity institutes patent litigation against You or any
other entity (including a cross-claim or counterclaim in a lawsuit)
alleging that your Contribution, or the Work to which you have
contributed, constitutes direct or contributory patent infringement,
then any patent licenses granted to that entity under this Agreement
for that Contribution or Work shall terminate as of the date such
litigation is filed.
4. You represent that You are legally entitled to grant the above
license. You represent further that each employee of the
Corporation designated on Schedule A below (or in a subsequent
written modification to that Schedule) is authorized to submit
Contributions on behalf of the Corporation.
5. You represent that each of Your Contributions is Your original
creation (see section 7 for submissions on behalf of others).
6. You are not expected to provide support for Your Contributions,
except to the extent You desire to provide support. You may provide
support for free, for a fee, or not at all. Unless required by
applicable law or agreed to in writing, You provide Your
Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
OF ANY KIND, either express or implied, including, without
limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT,
MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE.
7. Should You wish to submit work that is not Your original creation,
You may submit it to the Copyright Holders separately from any
Contribution, identifying the complete details of its source and
of any license or other restriction (including, but not limited
to, related patents, trademarks, and license agreements) of which
you are personally aware, and conspicuously marking the work as
"Submitted on behalf of a third-party: [named here]".
8. It is your responsibility to notify the Copyright Holders when any change
is required to the list of designated employees authorized to submit
Contributions on behalf of the Corporation, or to the Corporation's
Point of Contact with the Copyright Holders.
Please sign: __________________________________ Date: _______________
Title: __________________________________
Corporation: __________________________________
Schedule A
[Initial list of designated employees. NB: authorization is not
tied to particular Contributions.]
Schedule B
[Identification of optional concurrent software grant. Would be
left blank or omitted if there is no concurrent software grant.]
================================================
FILE: CMakeLists.txt
================================================
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
option (BUILD_DEPENDENCIES "Whether or not a superbuild should be invoked" ON)
option (BUILD_MEMORY "Whether or not build trtlab/memory" ON)
option (BUILD_CORE "Whether or not build trtlab/core" ON)
option (BUILD_CUDA "Whether or not build trtlab/cuda" ON)
option (BUILD_NVRPC "Whether or not build trtlab/nvrpc" ON)
option (BUILD_TENSORRT "Whether or not build trtlab/tensorrt" ON)
option (BUILD_PYTHON "Whether or not build trtlab/pybind" OFF)
option (BUILD_EXAMPLES "Whether or not to build trtlab examples" OFF)
if (BUILD_DEPENDENCIES)
project (trtlab_dependencies NONE)
include (cmake/dependencies.cmake)
return() # stop processing this file further
else()
project (trtlab)
endif()
# CMake path
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake")
# trtlab
add_subdirectory(trtlab)
if (BUILD_EXAMPLES)
add_subdirectory(examples)
endif()
================================================
FILE: CREDITS.md
================================================
`ThreadPool` class was derived from https://github.com/progschj/ThreadPool
> Copyright (c) 2012 Jakob Progsch, Václav Zeman
>
> This software is provided 'as-is', without any express or implied
> warranty. In no event will the authors be held liable for any damages
> arising from the use of this software.
>
> Permission is granted to anyone to use this software for any purpose,
> including commercial applications, and to alter it and redistribute it
> freely, subject to the following restrictions:
>
> 1. The origin of this software must not be misrepresented; you must not
> claim that you wrote the original software. If you use this software
> in a product, an acknowledgment in the product documentation would be
> appreciated but is not required.
>
> 2. Altered source versions must be plainly marked as such, and must not be
> misrepresented as being the original software.
>
> 3. This notice may not be removed or altered from any source
> distribution.
>
> Modifications to the original work include:
> * Header-only file was split into .h/.cc files
> * Added an extra safety check (lines 30-31) in the construction (.cc file).
> * Added CPU affinity options to the constructor
-----
`cpuaff` is distributed unmodified from the original in [`third-party/cpuaff`]
(https://github.com/dcdillon/cpuaff)
> Copyright (c) 2015, Daniel C. Dillon
> All rights reserved.
>
> Redistribution and use in source and binary forms, with or without
> modification, are permitted provided that the following conditions are met:
>
> * Redistributions of source code must retain the above copyright notice, this
> list of conditions and the following disclaimer.
>
> * Redistributions in binary form must reproduce the above copyright notice,
> this list of conditions and the following disclaimer in the documentation
> and/or other materials provided with the distribution.
>
> * Neither the name of cpuaff nor the names of its
> contributors may be used to endorse or promote products derived from
> this software without specific prior written permission.
>
> THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
> AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
> FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
> SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
> CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-----
`wait-for-it.sh` is distributed unmodified from the original in [`third-pary/wait-for-it`]
(https://github.com/vishnubob/wait-for-it).
> The MIT License (MIT)
> Copyright (c) 2016 Giles Hall
>
> Permission is hereby granted, free of charge, to any person obtaining a copy of
> this software and associated documentation files (the "Software"), to deal in
> the Software without restriction, including without limitation the rights to
> use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
> of the Software, and to permit persons to whom the Software is furnished to do
> so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in all
> copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
---
Example gRPC client code was used with modification from the [gRPC project]
(https://github.com/grpc/grpc), specifically the [synchronous c++ client]
(https://github.com/grpc/grpc/blob/master/examples/cpp/helloworld/greeter_client.cc)
> Copyright 2015 gRPC authors.
>
> Licensed under the Apache License, Version 2.0 (the "License");
> you may not use this file except in compliance with the License.
> You may obtain a copy of the License at
>
> http://www.apache.org/licenses/LICENSE-2.0
>
> Unless required by applicable law or agreed to in writing, software
> distributed under the License is distributed on an "AS IS" BASIS,
> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> See the License for the specific language governing permissions and
> limitations under the License.
---
[moodycamel::ConcurrentQueue](https://github.com/cameron314/concurrentqueue) is
added unmodified to the Docker images and loaded into the `playground` namespace.
> Simplified BSD License:
>
> Copyright (c) 2013-2016, Cameron Desrochers. All rights reserved.
>
> Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
>
> Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
> Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
> THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---
`transwarp` is used unmodified from the original [bloomen/transwarp](https://github.com/bloomen/transwarp)
> MIT License
>
> Copyright (c) 2018-2019 Christian Blume, Guan Wang
>
> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
---
Caffe ResNet-50 and ResNet-152 models from [KaimingHe/deep-residual-networks]
(https://github.com/KaimingHe/deep-residual-networks) are included without modification.
> The MIT License (MIT)
>
> Copyright (c) 2016 Shaoqing Ren
>
> Permission is hereby granted, free of charge, to any person obtaining a copy
> of this software and associated documentation files (the "Software"), to deal
> in the Software without restriction, including without limitation the rights
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> copies of the Software, and to permit persons to whom the Software is
> furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in all
> copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> SOFTWARE.
---
================================================
FILE: Dockerfile
================================================
# stage 1 - development container
# holds the core nvidia libraries but does not container the project source code
# use this container for development by mapping our source into the image which
# persists your source code outside of the container lifecycle
FROM nvcr.io/nvidia/tensorrt:20.06-py3 AS base
RUN apt update
RUN apt install -y clang-format libssl-dev openssl libz-dev software-properties-common
# remove base cmake
RUN apt remove --purge -y cmake
RUN apt autoremove -y
RUN apt autoclean -y
# install cmake ppa from kitware - https://apt.kitware.com/
RUN apt install -y apt-transport-https ca-certificates gnupg software-properties-common wget
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
RUN apt update && apt install -y cmake
# then remove FindGTest.cmake installed by cmake
RUN find / -name "FindGTest.cmake" -exec rm -f {} \;
# add cufft and nvml to the container image
RUN apt install -y libcufft-dev-11-0 cuda-nvml-dev-11-0
# override some envs
ENV LD_LIBRARY_PATH=/externals/myelin/x86_64/cuda-11.0/lib:/externals/cudnn/x86_64/8.0/cuda-11.0/lib64:/usr/local/cuda-11.0/targets/x86_64-linux/lib
ENV CCACHE_DIR=/tmp/.ccache
RUN cd /usr/lib/x86_64-linux-gnu && ln -s libnvidia-ml.so.1 libnvidia-ml.so
# stage 2: build the project inside the dev container
FROM base AS trtlab
WORKDIR /work
COPY . .
RUN mkdir build && cd build && cmake .. && make -j
================================================
FILE: LICENSE
================================================
BSD 3-Clause License
Copyright (c) 2018-2019, NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: README.md
================================================
# TensorRT Laboratory
The TensorRT Laboratory (trtlab) is a general purpose set of tools to build customer inference applications
and services.
[Triton](https://github.com/nvidia/triton) is a professional grade production inference server.
This project is broken into 4 primary components:
* `memory` is based on [foonathan/memory](https://github.com/foonathan/memory) the `memory` module
was designed to write custom allocators for both host and gpu memory. Several custom allocators are
included.
* `core` contains host/cpu-side tools for common components such as thread pools, resource pool,
and userspace threading based on boost fibers.
* `cuda` extends `memory` with a new memory_type for CUDA device memory. All custom allocators
in `memory` can be used with `device_memory`, `device_managed_memory` or `host_pinned_memory`.
* `nvrpc` is an abstraction layer for building asynchronous microservices. The current implementation
is based on grpc.
* `tensorrt` provides an opinionated runtime built on the TensorRT API.
## Quickstart
The easiest way to manage the external NVIDIA dependencies is to leverage the containers hosted on
[NGC](https://ngc.nvidia.com). For bare metal installs, use the `Dockerfile` as a template for
which NVIDIA libraries to install.
```
docker build -t trtlab .
```
For development purposes, the following set of commands first builds the base image, then
maps the source code on the host into a running container.
```
docker build -t trtlab:dev --target base .
docker run --rm -ti --gpus=all -v $PWD:/work --workdir=/work --net=host trtlab:dev bash
```
## Copyright and License
This project is released under the [BSD 3-clause license](LICENSE).
## Issues and Contributing
* Please let us know by [filing a new issue](https://github.com/NVIDIA/tensorrt-laboratory/issues/new)
* You can contribute by opening a [pull request](https://help.github.com/articles/using-pull-requests/)
Pull requests with changes of 10 lines or more will require a [Contributor License Agreement](CLA).
================================================
FILE: WORKSPACE
================================================
workspace(name = "com_github_nvidia_trtlab")
load(":bazel/repositories.bzl", "repositories")
repositories()
load ("//bazel:cuda_configure.bzl", "cuda_configure")
cuda_configure(name = "local_config_cuda")
load ("//bazel:tensorrt_configure.bzl", "tensorrt_configure")
tensorrt_configure(name = "local_config_tensorrt")
load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps")
grpc_deps()
================================================
FILE: bazel/BUILD.bazel
================================================
exports_files(
glob(["*.bzl"]),
visibility = ["//visibility:public"],
)
================================================
FILE: bazel/cuda_configure.bzl
================================================
"""Build rule generator for locally installed CUDA toolkit and cuDNN SDK."""
# src: https://github.com/google/nvidia_libs_test
def _get_env_var(repository_ctx, name, default):
if name in repository_ctx.os.environ:
return repository_ctx.os.environ[name]
return default
def _impl(repository_ctx):
cuda_path = _get_env_var(repository_ctx, "CUDA_PATH", "/usr/local/cuda")
cudnn_path = _get_env_var(repository_ctx, "CUDNN_PATH", cuda_path)
print("Using CUDA from %s\n" % cuda_path)
print("Using cuDNN from %s\n" % cudnn_path)
repository_ctx.symlink(cuda_path, "cuda")
repository_ctx.symlink(cudnn_path, "cudnn")
repository_ctx.file("nvcc.sh", """
#! /bin/bash
repo_path=%s
compiler=${CC:+"--compiler-bindir=$CC"}
$repo_path/cuda/bin/nvcc $compiler --compiler-options=-fPIC --include-path=$repo_path $*
""" % repository_ctx.path("."))
repository_ctx.file("BUILD", """
package(default_visibility = ["//visibility:public"])
sh_binary(
name = "nvcc",
srcs = ["nvcc.sh"],
)
# The *_headers cc_library rules below aren't cc_inc_library rules because
# dependent targets would only see the first one.
cc_library(
name = "cuda_headers",
hdrs = glob(
include = ["cuda/include/**/*.h*"],
exclude = ["cuda/include/cudnn.h"]
),
# Allows including CUDA headers with angle brackets.
includes = ["cuda/include"],
)
cc_library(
name = "cuda",
srcs = ["cuda/lib64/stubs/libcuda.so"],
linkopts = ["-ldl"],
)
cc_library(
name = "cuda_runtime",
srcs = ["cuda/lib64/libcudart_static.a"],
deps = [":cuda"],
linkopts = ["-lrt"],
)
cc_library(
name = "curand_static",
srcs = [
"cuda/lib64/libcurand_static.a",
"cuda/lib64/libculibos.a",
],
)
cc_library(
name = "cupti_headers",
hdrs = glob(["cuda/extras/CUPTI/include/**/*.h"]),
# Allows including CUPTI headers with angle brackets.
includes = ["cuda/extras/CUPTI/include"],
)
cc_library(
name = "cupti",
srcs = glob(["cuda/extras/CUPTI/lib64/libcupti.so*"]),
)
cc_library(
name = "cudnn",
srcs = [
"cudnn/lib64/libcudnn_static.a",
"cuda/lib64/libcublas_static.a",
"cuda/lib64/libculibos.a",
],
hdrs = ["cudnn/include/cudnn.h"],
deps = [
":cuda",
":cuda_headers"
],
)
cc_library(
name = "cuda_util",
deps = [":cuda_util_compile"],
)
""")
cuda_configure = repository_rule(
implementation = _impl,
environ = ["CUDA_PATH", "CUDNN_PATH"],
)
================================================
FILE: bazel/repositories.bzl
================================================
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
def repositories():
_maybe(
http_archive,
name = "com_github_antonovvk_bazel_rules",
sha256 = "ba75b07d3fd297375a6688e9a16583eb616e7a74b3d5e8791e7a222cf36ab26e",
strip_prefix = "bazel_rules-98ddd7e4f7c63ea0868f08bcc228463dac2f9f12",
urls = [
"https://mirror.bazel.build/github.com/antonovvk/bazel_rules/archive/98ddd7e4f7c63ea0868f08bcc228463dac2f9f12.tar.gz",
"https://github.com/antonovvk/bazel_rules/archive/98ddd7e4f7c63ea0868f08bcc228463dac2f9f12.tar.gz",
],
)
_maybe(
http_archive,
name = "com_github_gflags_gflags",
sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe",
strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a",
urls = [
"https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
"https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
],
)
_maybe(
http_archive,
name = "com_google_glog",
sha256 = "1ee310e5d0a19b9d584a855000434bb724aa744745d5b8ab1855c85bff8a8e21",
strip_prefix = "glog-028d37889a1e80e8a07da1b8945ac706259e5fd8",
urls = [
"https://mirror.bazel.build/github.com/google/glog/archive/028d37889a1e80e8a07da1b8945ac706259e5fd8.tar.gz",
"https://github.com/google/glog/archive/028d37889a1e80e8a07da1b8945ac706259e5fd8.tar.gz",
],
)
_maybe(
http_archive,
name = "com_google_googletest",
sha256 = "c18f281fd6621bb264570b99860a0241939b4a251c9b1af709b811d33bc63af8",
strip_prefix = "googletest-e3bd4cbeaeef3cee65a68a8bd3c535cb779e9b6d",
urls = [
"https://mirror.bazel.build/github.com/google/googletest/archive/e3bd4cbeaeef3cee65a68a8bd3c535cb779e9b6d.tar.gz",
"https://github.com/google/googletest/archive/e3bd4cbeaeef3cee65a68a8bd3c535cb779e9b6d.tar.gz",
],
)
_maybe(
http_archive,
name = "com_github_grpc_grpc",
strip_prefix = "grpc-1.16.1",
urls = [
"https://github.com/grpc/grpc/archive/v1.16.1.tar.gz",
],
)
def load_trtis():
http_archive(
name = "com_github_nvidia_trtis",
strip_prefix = "tensorrt-inference-server-0.9.0",
urls = [
"https://github.com/NVIDIA/tensorrt-inference-server/archive/v0.9.0.tar.gz",
],
)
def load_benchmark():
http_archive(
name = "com_github_google_benchmark",
sha256 = "f8e525db3c42efc9c7f3bc5176a8fa893a9a9920bbd08cef30fb56a51854d60d",
strip_prefix = "benchmark-1.4.1",
urls = [
"https://github.com/google/benchmark/archive/v1.4.1.tar.gz",
],
)
def _maybe(repo_rule, name, **kwargs):
if name not in native.existing_rules():
repo_rule(name = name, **kwargs)
================================================
FILE: bazel/tensorrt_configure.bzl
================================================
"""Build rule generator for locally installed TensorRT."""
# inspired from: https://github.com/google/nvidia_libs_test
def _get_env_var(repository_ctx, name, default):
if name in repository_ctx.os.environ:
return repository_ctx.os.environ[name]
return default
def _impl(repository_ctx):
hdrs_path = _get_env_var(repository_ctx, "TENSORRT_HDRS_PATH", "/usr/include/x86_64-linux-gnu")
libs_path = _get_env_var(repository_ctx, "TENSORRT_LIBS_PATH", "/usr/lib/x86_64-linux-gnu")
print("Using TensorRT Headers from %s\n" % hdrs_path)
print("Using TensorRT Libs from %s\n" % libs_path)
repository_ctx.symlink(hdrs_path, "include")
repository_ctx.symlink(libs_path, "libs")
repository_ctx.file("BUILD", """
package(default_visibility = ["//visibility:public"])
# The *_headers cc_library rules below aren't cc_inc_library rules because
# dependent targets would only see the first one.
cc_library(
name = "tensorrt_headers",
hdrs = glob(
include = ["include/Nv*.h"],
),
strip_include_prefix = "include",
# Allows including CUDA headers with angle brackets.
# includes = ["cuda/include"],
)
cc_library(
name = "tensorrt_infer",
srcs = ["libs/libnvinfer.so"],
linkopts = ["-ldl"],
)
""")
tensorrt_configure = repository_rule(
implementation = _impl,
environ = ["TENSORRT_HDRS_PATH", "TENSORRT_LIBS_PATH"],
)
================================================
FILE: build.sh
================================================
#!/bin/bash
#
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
mkdir -p build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j
cd /work/notebooks
ln -f -s /work/build/tensorrt-laboratory/python/trtlab/trtlab.cpython-35m-x86_64-linux-gnu.so
#make install
================================================
FILE: cmake/FindTensorRT.cmake
================================================
# This module defines the following variables:
#
# ::
#
# TensorRT_INCLUDE_DIRS
# TensorRT_LIBRARIES
# TensorRT_FOUND
#
# ::
#
# TensorRT_VERSION_STRING - version (x.y.z)
# TensorRT_VERSION_MAJOR - major version (x)
# TensorRT_VERSION_MINOR - minor version (y)
# TensorRT_VERSION_PATCH - patch version (z)
#
# Hints
# ^^^^^
# A user may set ``TensorRT_ROOT`` to an installation root to tell this module where to look.
#
set(_TensorRT_SEARCHES)
if(TensorRT_ROOT)
set(_TensorRT_SEARCH_ROOT PATHS ${TensorRT_ROOT} NO_DEFAULT_PATH)
list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_ROOT)
endif()
# appends some common paths
set(_TensorRT_SEARCH_NORMAL
PATHS "/usr"
)
list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_NORMAL)
# Include dir
foreach(search ${_TensorRT_SEARCHES})
find_path(TensorRT_INCLUDE_DIR NAMES NvInfer.h ${${search}} PATH_SUFFIXES include)
endforeach()
if(NOT TensorRT_LIBRARY)
foreach(search ${_TensorRT_SEARCHES})
find_library(TensorRT_LIBRARY NAMES nvinfer ${${search}} PATH_SUFFIXES lib)
endforeach()
endif()
mark_as_advanced(TensorRT_INCLUDE_DIR)
if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h")
file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")
string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}")
string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}")
string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}")
set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}")
endif()
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TensorRT REQUIRED_VARS TensorRT_LIBRARY TensorRT_INCLUDE_DIR VERSION_VAR TensorRT_VERSION_STRING)
if(TensorRT_FOUND)
set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR})
if(NOT TensorRT_LIBRARIES)
set(TensorRT_LIBRARIES ${TensorRT_LIBRARY})
endif()
if(NOT TARGET TensorRT::TensorRT)
add_library(TensorRT::TensorRT UNKNOWN IMPORTED)
set_target_properties(TensorRT::TensorRT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}")
set_property(TARGET TensorRT::TensorRT APPEND PROPERTY IMPORTED_LOCATION "${TensorRT_LIBRARY}")
endif()
endif()
================================================
FILE: cmake/Findcpuaff.cmake
================================================
# This module defines the following variables:
#
# ::
#
# CPUAFF_INCLUDE_DIRS
# CPUAFF_FOUND
#
# ::
#
# Hints
# ^^^^^
# A user may set ``CPUAFF_ROOT`` to an installation root to tell this module where to look.
#
set(CPUAFF_FOUND FALSE)
set(_CPUAFF_SEARCHES)
if(CPUAFF_ROOT)
set(_CPUAFF_SEARCH_ROOT PATHS ${CPUAFF_ROOT} NO_DEFAULT_PATH)
list(APPEND _CPUAFF_SEARCHES _CPUAFF_SEARCH_ROOT)
else()
list(APPEND _CPUAFF_SEARCHES "/usr")
list(APPEND _CPUAFF_SEARCHES "/usr/local")
endif()
# Include dir
foreach(search ${_CPUAFF_SEARCHES})
find_path(
CPUAFF_INCLUDE_DIR
NAMES cpuaff/cpuaff.hpp
PATHS ${CPUAFF_ROOT}
PATH_SUFFIXES include)
message(STATUS "cpuaff: ${CPUAFF_INCLUDE_DIR}")
endforeach()
mark_as_advanced(CPUAFF_INCLUDE_DIR)
if(CPUAFF_INCLUDE_DIR AND EXISTS "${CPUAFF_INCLUDE_DIR}/cpuaff/cpuaff.hpp")
set(CPUAFF_FOUND True)
add_library(cpuaff INTERFACE)
target_include_directories(cpuaff INTERFACE ${CPUAFF_INCLUDE_DIR})
endif()
================================================
FILE: cmake/GRPCGenerateCPP.cmake
================================================
find_package(gRPC REQUIRED COMPONENTS grpc_cpp_plugin)
set(_gRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
function(PROTOBUF_GENERATE_GRPC_CPP SRCS HDRS)
cmake_parse_arguments(protobuf "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN})
set(PROTO_FILES "${protobuf_UNPARSED_ARGUMENTS}")
if(NOT PROTO_FILES)
message(SEND_ERROR "Error: PROTOBUF_GENERATE_GRPC_CPP() called without any proto files")
return()
endif()
if(PROTOBUF_GENERATE_CPP_APPEND_PATH) # This variable is common for all types of output.
# Create an include path for each file specified
foreach(FIL ${PROTO_FILES})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(ABS_PATH ${ABS_FIL} PATH)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
else()
set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
endif()
if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
set(Protobuf_IMPORT_DIR "${PROTOBUF_IMPORT_DIRS")
endif()
if(DEFINED Protobuf_IMPORT_DIRS)
foreach(DIR ${Protobuf_IMPORT_DIRS})
get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
endif()
set(${SRCS})
set(${HDRS})
foreach(FIL ${PROTO_FILES})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)
if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH)
get_filename_component(FIL_DIR ${FIL} DIRECTORY)
if(FIL_DIR)
set(FIL_WE "${FIL_DIR}/${FIL_WE}")
endif()
endif()
set(_protobuf_grpc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc")
set(_protobuf_grpc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h")
list(APPEND ${SRCS} "${_protobuf_grpc_src}")
list(APPEND ${HDRS} "${_protobuf_grpc_hdr}")
add_custom_command(
OUTPUT "${_protobuf_grpc_src}"
"${_protobuf_grpc_hdr}"
COMMAND ${Protobuf_PROTOC_EXECUTABLE}
--grpc_out=${CMAKE_CURRENT_BINARY_DIR}
--plugin=protoc-gen-grpc=${_gRPC_CPP_PLUGIN_EXECUTABLE}
${_protobuf_include_path} ${ABS_FIL}
DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
COMMENT "Running gRPC C++ protocol buffer compiler on ${FIL}"
VERBATIM)
endforeach()
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
endfunction()
================================================
FILE: cmake/GRPCGenerateCPPLikeBazel.cmake
================================================
find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin) # Get full path to plugin
function(PROTOBUF_GENERATE_GRPC_CPP_LIKE_BAZEL SRCS HDRS)
cmake_parse_arguments(protobuf "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN})
set(PROTO_FILES "${protobuf_UNPARSED_ARGUMENTS}")
if(NOT PROTO_FILES)
message(SEND_ERROR "Error: PROTOBUF_GENERATE_GRPC_CPP() called without any proto files")
return()
endif()
if(protobuf_EXPORT_MACRO)
set(DLL_EXPORT_DECL "dllexport_decl=${protobuf_EXPORT_MACRO}:")
endif()
get_filename_component(ABS_PROTO_PATH ${CMAKE_SOURCE_DIR} ABSOLUTE)
set(EXTRA_ARGS "--proto_path=${ABS_PROTO_PATH}")
file(RELATIVE_PATH Protobuf_PRE_IMPORT_DIRS ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
if(PROTOBUF_GENERATE_CPP_APPEND_PATH) # This variable is common for all types of output.
# Create an include path for each file specified
foreach(FIL ${PROTO_FILES})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(ABS_PATH ${ABS_FIL} PATH)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
else()
set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
endif()
if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
set(Protobuf_IMPORT_DIR "${PROTOBUF_IMPORT_DIRS")
endif()
if(DEFINED Protobuf_IMPORT_DIRS)
foreach(DIR ${Protobuf_IMPORT_DIRS})
get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
endif()
set(${SRCS})
set(${HDRS})
foreach(FIL ${PROTO_FILES})
message(STATUS "grpc_cpp_proto: ${FIL}")
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)
message(STATUS "grpc_cpp_proto_abs: ${ABS_FIL}")
if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH)
get_filename_component(FIL_DIR ${FIL} DIRECTORY)
if(FIL_DIR)
set(FIL_WE "${FIL_DIR}/${FIL_WE}")
endif()
endif()
if(Protobuf_PRE_IMPORT_DIRS)
set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${Protobuf_PRE_IMPORT_DIRS}/${FIL_WE}.grpc.pb.cc")
set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${Protobuf_PRE_IMPORT_DIRS}/${FIL_WE}.grpc.pb.h")
else()
set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc")
set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h")
endif()
message(STATUS "grpc_cpp_src: ${_protobuf_protoc_src}")
list(APPEND ${SRCS} "${_protobuf_protoc_src}")
list(APPEND ${HDRS} "${_protobuf_protoc_hdr}")
add_custom_command(
OUTPUT "${_protobuf_protoc_src}"
"${_protobuf_protoc_hdr}"
COMMAND ${Protobuf_PROTOC_EXECUTABLE}
${EXTRA_ARGS}
"--grpc_out=${CMAKE_CURRENT_BINARY_DIR}"
"--plugin=protoc-gen-grpc=${GRPC_CPP_PLUGIN}"
${_protobuf_protoc_flags}
${_protobuf_include_path} ${ABS_FIL}
DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
COMMENT "Running gRPC C++ protocol buffer compiler on ${FIL}"
VERBATIM)
endforeach()
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
endfunction()
================================================
FILE: cmake/LibFindMacros.cmake
================================================
# Version 2.2
# Public Domain, originally written by Lasse Kärkkäinen <tronic>
# Maintained at https://github.com/Tronic/cmake-modules
# Please send your improvements as pull requests on Github.
# Find another package and make it a dependency of the current package.
# This also automatically forwards the "REQUIRED" argument.
# Usage: libfind_package(<prefix> <another package> [extra args to find_package])
macro (libfind_package PREFIX PKG)
set(${PREFIX}_args ${PKG} ${ARGN})
if (${PREFIX}_FIND_REQUIRED)
set(${PREFIX}_args ${${PREFIX}_args} REQUIRED)
endif()
find_package(${${PREFIX}_args})
set(${PREFIX}_DEPENDENCIES ${${PREFIX}_DEPENDENCIES};${PKG})
unset(${PREFIX}_args)
endmacro()
# A simple wrapper to make pkg-config searches a bit easier.
# Works the same as CMake's internal pkg_check_modules but is always quiet.
macro (libfind_pkg_check_modules)
find_package(PkgConfig QUIET)
if (PKG_CONFIG_FOUND)
pkg_check_modules(${ARGN} QUIET)
endif()
endmacro()
# Avoid useless copy&pasta by doing what most simple libraries do anyway:
# pkg-config, find headers, find library.
# Usage: libfind_pkg_detect(<prefix> <pkg-config args> FIND_PATH <name> [other args] FIND_LIBRARY <name> [other args])
# E.g. libfind_pkg_detect(SDL2 sdl2 FIND_PATH SDL.h PATH_SUFFIXES SDL2 FIND_LIBRARY SDL2)
function (libfind_pkg_detect PREFIX)
# Parse arguments
set(argname pkgargs)
foreach (i ${ARGN})
if ("${i}" STREQUAL "FIND_PATH")
set(argname pathargs)
elseif ("${i}" STREQUAL "FIND_LIBRARY")
set(argname libraryargs)
else()
set(${argname} ${${argname}} ${i})
endif()
endforeach()
if (NOT pkgargs)
message(FATAL_ERROR "libfind_pkg_detect requires at least a pkg_config package name to be passed.")
endif()
# Find library
libfind_pkg_check_modules(${PREFIX}_PKGCONF ${pkgargs})
if (pathargs)
find_path(${PREFIX}_INCLUDE_DIR NAMES ${pathargs} HINTS ${${PREFIX}_PKGCONF_INCLUDE_DIRS})
endif()
if (libraryargs)
find_library(${PREFIX}_LIBRARY NAMES ${libraryargs} HINTS ${${PREFIX}_PKGCONF_LIBRARY_DIRS})
endif()
endfunction()
# Extracts a version #define from a version.h file, output stored to <PREFIX>_VERSION.
# Usage: libfind_version_header(Foobar foobar/version.h FOOBAR_VERSION_STR)
# Fourth argument "QUIET" may be used for silently testing different define names.
# This function does nothing if the version variable is already defined.
function (libfind_version_header PREFIX VERSION_H DEFINE_NAME)
# Skip processing if we already have a version or if the include dir was not found
if (${PREFIX}_VERSION OR NOT ${PREFIX}_INCLUDE_DIR)
return()
endif()
set(quiet ${${PREFIX}_FIND_QUIETLY})
# Process optional arguments
foreach(arg ${ARGN})
if (arg STREQUAL "QUIET")
set(quiet TRUE)
else()
message(AUTHOR_WARNING "Unknown argument ${arg} to libfind_version_header ignored.")
endif()
endforeach()
# Read the header and parse for version number
set(filename "${${PREFIX}_INCLUDE_DIR}/${VERSION_H}")
if (NOT EXISTS ${filename})
if (NOT quiet)
message(AUTHOR_WARNING "Unable to find ${${PREFIX}_INCLUDE_DIR}/${VERSION_H}")
endif()
return()
endif()
file(READ "${filename}" header)
string(REGEX REPLACE ".*#[ \t]*define[ \t]*${DEFINE_NAME}[ \t]*\"([^\n]*)\".*" "\\1" match "${header}")
# No regex match?
if (match STREQUAL header)
if (NOT quiet)
message(AUTHOR_WARNING "Unable to find \#define ${DEFINE_NAME} \"<version>\" from ${${PREFIX}_INCLUDE_DIR}/${VERSION_H}")
endif()
return()
endif()
# Export the version string
set(${PREFIX}_VERSION "${match}" PARENT_SCOPE)
endfunction()
# Do the final processing once the paths have been detected.
# If include dirs are needed, ${PREFIX}_PROCESS_INCLUDES should be set to contain
# all the variables, each of which contain one include directory.
# Ditto for ${PREFIX}_PROCESS_LIBS and library files.
# Will set ${PREFIX}_FOUND, ${PREFIX}_INCLUDE_DIRS and ${PREFIX}_LIBRARIES.
# Also handles errors in case library detection was required, etc.
function (libfind_process PREFIX)
# Skip processing if already processed during this configuration run
if (${PREFIX}_FOUND)
return()
endif()
set(found TRUE) # Start with the assumption that the package was found
# Did we find any files? Did we miss includes? These are for formatting better error messages.
set(some_files FALSE)
set(missing_headers FALSE)
# Shorthands for some variables that we need often
set(quiet ${${PREFIX}_FIND_QUIETLY})
set(required ${${PREFIX}_FIND_REQUIRED})
set(exactver ${${PREFIX}_FIND_VERSION_EXACT})
set(findver "${${PREFIX}_FIND_VERSION}")
set(version "${${PREFIX}_VERSION}")
# Lists of config option names (all, includes, libs)
unset(configopts)
set(includeopts ${${PREFIX}_PROCESS_INCLUDES})
set(libraryopts ${${PREFIX}_PROCESS_LIBS})
# Process deps to add to
foreach (i ${PREFIX} ${${PREFIX}_DEPENDENCIES})
if (DEFINED ${i}_INCLUDE_OPTS OR DEFINED ${i}_LIBRARY_OPTS)
# The package seems to export option lists that we can use, woohoo!
list(APPEND includeopts ${${i}_INCLUDE_OPTS})
list(APPEND libraryopts ${${i}_LIBRARY_OPTS})
else()
# If plural forms don't exist or they equal singular forms
if ((NOT DEFINED ${i}_INCLUDE_DIRS AND NOT DEFINED ${i}_LIBRARIES) OR
({i}_INCLUDE_DIR STREQUAL ${i}_INCLUDE_DIRS AND ${i}_LIBRARY STREQUAL ${i}_LIBRARIES))
# Singular forms can be used
if (DEFINED ${i}_INCLUDE_DIR)
list(APPEND includeopts ${i}_INCLUDE_DIR)
endif()
if (DEFINED ${i}_LIBRARY)
list(APPEND libraryopts ${i}_LIBRARY)
endif()
else()
# Oh no, we don't know the option names
message(FATAL_ERROR "We couldn't determine config variable names for ${i} includes and libs. Aieeh!")
endif()
endif()
endforeach()
if (includeopts)
list(REMOVE_DUPLICATES includeopts)
endif()
if (libraryopts)
list(REMOVE_DUPLICATES libraryopts)
endif()
string(REGEX REPLACE ".*[ ;]([^ ;]*(_INCLUDE_DIRS|_LIBRARIES))" "\\1" tmp "${includeopts} ${libraryopts}")
if (NOT tmp STREQUAL "${includeopts} ${libraryopts}")
message(AUTHOR_WARNING "Plural form ${tmp} found in config options of ${PREFIX}. This works as before but is now deprecated. Please only use singular forms INCLUDE_DIR and LIBRARY, and update your find scripts for LibFindMacros > 2.0 automatic dependency system (most often you can simply remove the PROCESS variables entirely).")
endif()
# Include/library names separated by spaces (notice: not CMake lists)
unset(includes)
unset(libs)
# Process all includes and set found false if any are missing
foreach (i ${includeopts})
list(APPEND configopts ${i})
if (NOT "${${i}}" STREQUAL "${i}-NOTFOUND")
list(APPEND includes "${${i}}")
else()
set(found FALSE)
set(missing_headers TRUE)
endif()
endforeach()
# Process all libraries and set found false if any are missing
foreach (i ${libraryopts})
list(APPEND configopts ${i})
if (NOT "${${i}}" STREQUAL "${i}-NOTFOUND")
list(APPEND libs "${${i}}")
else()
set (found FALSE)
endif()
endforeach()
# Version checks
if (found AND findver)
if (NOT version)
message(WARNING "The find module for ${PREFIX} does not provide version information, so we'll just assume that it is OK. Please fix the module or remove package version requirements to get rid of this warning.")
elseif (version VERSION_LESS findver OR (exactver AND NOT version VERSION_EQUAL findver))
set(found FALSE)
set(version_unsuitable TRUE)
endif()
endif()
# If all-OK, hide all config options, export variables, print status and exit
if (found)
foreach (i ${configopts})
mark_as_advanced(${i})
endforeach()
if (NOT quiet)
message(STATUS "Found ${PREFIX} ${${PREFIX}_VERSION}")
if (LIBFIND_DEBUG)
message(STATUS " ${PREFIX}_DEPENDENCIES=${${PREFIX}_DEPENDENCIES}")
message(STATUS " ${PREFIX}_INCLUDE_OPTS=${includeopts}")
message(STATUS " ${PREFIX}_INCLUDE_DIRS=${includes}")
message(STATUS " ${PREFIX}_LIBRARY_OPTS=${libraryopts}")
message(STATUS " ${PREFIX}_LIBRARIES=${libs}")
endif()
set (${PREFIX}_INCLUDE_OPTS ${includeopts} PARENT_SCOPE)
set (${PREFIX}_LIBRARY_OPTS ${libraryopts} PARENT_SCOPE)
set (${PREFIX}_INCLUDE_DIRS ${includes} PARENT_SCOPE)
set (${PREFIX}_LIBRARIES ${libs} PARENT_SCOPE)
set (${PREFIX}_FOUND TRUE PARENT_SCOPE)
endif()
return()
endif()
# Format messages for debug info and the type of error
set(vars "Relevant CMake configuration variables:\n")
foreach (i ${configopts})
mark_as_advanced(CLEAR ${i})
set(val ${${i}})
if ("${val}" STREQUAL "${i}-NOTFOUND")
set (val "<not found>")
elseif (val AND NOT EXISTS ${val})
set (val "${val} (does not exist)")
else()
set(some_files TRUE)
endif()
set(vars "${vars} ${i}=${val}\n")
endforeach()
set(vars "${vars}You may use CMake GUI, cmake -D or ccmake to modify the values. Delete CMakeCache.txt to discard all values and force full re-detection if necessary.\n")
if (version_unsuitable)
set(msg "${PREFIX} ${${PREFIX}_VERSION} was found but")
if (exactver)
set(msg "${msg} only version ${findver} is acceptable.")
else()
set(msg "${msg} version ${findver} is the minimum requirement.")
endif()
else()
if (missing_headers)
set(msg "We could not find development headers for ${PREFIX}. Do you have the necessary dev package installed?")
elseif (some_files)
set(msg "We only found some files of ${PREFIX}, not all of them. Perhaps your installation is incomplete or maybe we just didn't look in the right place?")
if(findver)
set(msg "${msg} This could also be caused by incompatible version (if it helps, at least ${PREFIX} ${findver} should work).")
endif()
else()
set(msg "We were unable to find package ${PREFIX}.")
endif()
endif()
# Fatal error out if REQUIRED
if (required)
set(msg "REQUIRED PACKAGE NOT FOUND\n${msg} This package is REQUIRED and you need to install it or adjust CMake configuration in order to continue building ${CMAKE_PROJECT_NAME}.")
message(FATAL_ERROR "${msg}\n${vars}")
endif()
# Otherwise just print a nasty warning
if (NOT quiet)
message(WARNING "WARNING: MISSING PACKAGE\n${msg} This package is NOT REQUIRED and you may ignore this warning but by doing so you may miss some functionality of ${CMAKE_PROJECT_NAME}. \n${vars}")
endif()
endfunction()
================================================
FILE: cmake/ProtobufGenerateCPPLikeBazel.cmake
================================================
function(PROTOBUF_GENERATE_CPP_LIKE_BAZEL SRCS HDRS)
cmake_parse_arguments(protobuf "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN})
set(PROTO_FILES "${protobuf_UNPARSED_ARGUMENTS}")
if(NOT PROTO_FILES)
message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")
return()
endif()
if(protobuf_EXPORT_MACRO)
set(DLL_EXPORT_DECL "dllexport_decl=${protobuf_EXPORT_MACRO}:")
endif()
get_filename_component(ABS_PROTO_PATH ${CMAKE_SOURCE_DIR} ABSOLUTE)
set(EXTRA_ARGS "--proto_path=${ABS_PROTO_PATH}")
file(RELATIVE_PATH Protobuf_PRE_IMPORT_DIRS ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
# Create an include path for each file specified
foreach(FIL ${PROTO_FILES})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(ABS_PATH ${ABS_FIL} PATH)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
else()
set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
endif()
if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}")
endif()
if(DEFINED Protobuf_IMPORT_DIRS)
foreach(DIR ${Protobuf_IMPORT_DIRS})
get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
endif()
set(${SRCS})
set(${HDRS})
if (protobuf_DESCRIPTORS)
set(${protobuf_DESCRIPTORS})
endif()
foreach(FIL ${PROTO_FILES})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)
if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH)
get_filename_component(FIL_DIR ${FIL} DIRECTORY)
if(FIL_DIR)
set(FIL_WE "${FIL_DIR}/${FIL_WE}")
endif()
endif()
if(Protobuf_PRE_IMPORT_DIRS)
set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${Protobuf_PRE_IMPORT_DIRS}/${FIL_WE}.pb.cc")
set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${Protobuf_PRE_IMPORT_DIRS}/${FIL_WE}.pb.h")
else()
set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
endif()
list(APPEND ${SRCS} "${_protobuf_protoc_src}")
list(APPEND ${HDRS} "${_protobuf_protoc_hdr}")
if(protobuf_DESCRIPTORS)
set(_protobuf_protoc_desc "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.desc")
set(_protobuf_protoc_flags "--descriptor_set_out=${_protobuf_protoc_desc}")
list(APPEND ${protobuf_DESCRIPTORS} "${_protobuf_protoc_desc}")
else()
set(_protobuf_protoc_desc "")
set(_protobuf_protoc_flags "")
endif()
add_custom_command(
OUTPUT "${_protobuf_protoc_src}"
"${_protobuf_protoc_hdr}"
${_protobuf_protoc_desc}
COMMAND protobuf::protoc
${EXTRA_ARGS}
"--cpp_out=${DLL_EXPORT_DECL}${CMAKE_CURRENT_BINARY_DIR}"
${_protobuf_protoc_flags}
${_protobuf_include_path} ${ABS_FIL}
DEPENDS ${ABS_FIL} protobuf::protoc
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM )
endforeach()
set(${SRCS} "${${SRCS}}" PARENT_SCOPE)
set(${HDRS} "${${HDRS}}" PARENT_SCOPE)
if(protobuf_DESCRIPTORS)
set(${protobuf_DESCRIPTORS} "${${protobuf_DESCRIPTORS}}" PARENT_SCOPE)
endif()
endfunction()
================================================
FILE: cmake/dependencies.cmake
================================================
include (ExternalProject)
set (DEPENDENCIES)
set (EXTRA_CMAKE_ARGS)
# trtlab external dependencies
list (APPEND DEPENDENCIES boost dlpack gflags glog benchmark googletest cpuaff jemalloc)
list (APPEND DEPENDENCIES grpc-repo protobuf c-ares grpc cub cnpy)
# note on ubuntu 18.04, you need
# apt install libz-dev libssl-dev
# customize the folder for external projects
# download, source and builds for dependencies
# will be in <build-dir>/Dependencies
set_property (DIRECTORY PROPERTY EP_BASE Dependencies)
# all dependencies will be installed here
# typical directories: bin, include and lib
set (BUILD_ROOT ${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Build)
set (SOURCE_ROOT ${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Source)
set (INSTALL_ROOT ${CMAKE_CURRENT_BINARY_DIR}/local)
# set cmake search paths to pick up installed .cmake files
list(INSERT CMAKE_MODULE_PATH 0 "${INSTALL_ROOT}/lib/cmake")
list(INSERT CMAKE_PREFIX_PATH 0 "${INSTALL_ROOT}/lib/cmake")
# cmake config args forwarded to trtlab
list(APPEND EXTRA_CMAKE_ARGS
-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}
-DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}
# -DBoost_VERBOSE=ON
-DBoost_USE_STATIC_LIBS=ON
-DCPUAFF_ROOT=${INSTALL_ROOT}
-DJEMALLOC_STATIC_LIBRARIES=${INSTALL_ROOT}/lib/libjemalloc_pic.a
-DCUB_INCLUDE_DIR=${SOURCE_ROOT}/cub
-DINSTALL_ROOT=${INSTALL_ROOT}
)
# short-cut to dependencies build path
set (BUILD_ROOT ${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Build)
# Boost
# =====
# - Use static linking to avoid issues with system-wide installations of Boost.
# - Use numa=on to ensure the numa component of fiber gets built
set(BOOST_COMPONENTS "context,fiber,filesystem")
ExternalProject_Add (boost
URL https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.gz
URL_HASH SHA256=c66e88d5786f2ca4dbebb14e06b566fb642a1a6947ad8cc9091f9f445134143f
CONFIGURE_COMMAND ./bootstrap.sh --prefix=${INSTALL_ROOT} --with-libraries=${BOOST_COMPONENTS} numa=on
BUILD_COMMAND ./b2 link=static cxxflags=-fPIC cflags=-fPIC cxxflags="-std=c++14" numa=on
--build-dir=${BUILD_ROOT}/boost --stagedir=${BUILD_ROOT}/boost
BUILD_IN_SOURCE 1
INSTALL_COMMAND ./b2 install numa=on
)
# DLPack
# ======
ExternalProject_Add(dlpack
GIT_REPOSITORY "https://github.com/dmlc/dlpack.git"
GIT_TAG "master"
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT}
)
# gflags
# ======
# config, build and install to INSTALL_ROOT
ExternalProject_Add(gflags
GIT_REPOSITORY "https://github.com/gflags/gflags.git"
GIT_TAG "v2.2.2"
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT}
-DBUILD_SHARED_LIBS=ON
-DBUILD_STATIC_LIBS=ON
-DBUILD_PACKAGING=OFF
-DBUILD_TESTING=OFF
-DBUILD_CONFIG_TESTS=OFF
-DINSTALL_HEADERS=ON
-DBUILD_gflags_LIB=OFF
-DBUILD_gflags_nothreads_LIB=ON
-DGFLAGS_NAMESPACE=google
)
# glog
# ====
# - link against shared
# - todo: compile with -DWITH_GFLAGS=OFF and remove gflags dependency
ExternalProject_Add(glog
DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog"
GIT_TAG "v0.4.0"
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=Release
-DBUILD_TESTING=OFF
)
# google benchmark
# ================
ExternalProject_Add(benchmark
DEPENDS
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG "v1.5.0"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Build/benchmark"
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=Release
-DBENCHMARK_ENABLE_TESTING=OFF
)
# google test
# ===========
ExternalProject_Add(googletest
DEPENDS glog gflags
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG "release-1.10.0"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Build/googletest"
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=Release
)
# cpuaff
# ======
ExternalProject_Add(cpuaff
URL http://dcdillon.github.io/cpuaff/releases/cpuaff-1.0.6.tar.gz
CONFIGURE_COMMAND ./configure --prefix=${INSTALL_ROOT}
BUILD_COMMAND make include
INSTALL_COMMAND make install include
BUILD_IN_SOURCE 1
)
# nvidia cub
# ==========
ExternalProject_Add(cub
GIT_REPOSITORY https://github.com/NVlabs/cub.git
GIT_TAG "1.8.0"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
# jemalloc
# ========
ExternalProject_Add(jemalloc
URL https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2
CONFIGURE_COMMAND ./configure --prefix=${INSTALL_ROOT}
BUILD_COMMAND make include
INSTALL_COMMAND make install include
BUILD_IN_SOURCE 1
)
# cnpy - c++ library for reading and writing .npy/.npz files
# ==========================================================
ExternalProject_Add(cnpy
GIT_REPOSITORY "https://github.com/rogersce/cnpy.git"
GIT_TAG "master"
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=Release
-DBUILD_TESTING=OFF
-DCMAKE_POSITION_INDEPENDENT_CODEL=ON
)
# grpc-repo
# =========
ExternalProject_Add(grpc-repo
GIT_REPOSITORY "https://github.com/grpc/grpc.git"
GIT_TAG "v1.32.0"
GIT_SUBMODULES "third_party/cares/cares" "third_party/protobuf" "third_party/abseil-cpp" "third_party/re2"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
#
# Build protobuf project from grpc-repo
#
ExternalProject_Add(absl
SOURCE_DIR "${SOURCE_ROOT}/grpc-repo/third_party/abseil-cpp"
DOWNLOAD_COMMAND ""
CMAKE_CACHE_ARGS
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=TRUE
-DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT}
DEPENDS grpc-repo
)
ExternalProject_Add(re2
SOURCE_DIR "${SOURCE_ROOT}/grpc-repo/third_party/re2"
DOWNLOAD_COMMAND ""
CMAKE_CACHE_ARGS
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=TRUE
-DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT}
DEPENDS grpc-repo
)
ExternalProject_Add(protobuf
SOURCE_DIR "${SOURCE_ROOT}/grpc-repo/third_party/protobuf/cmake"
DOWNLOAD_COMMAND ""
CMAKE_ARGS
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-Dprotobuf_BUILD_TESTS:BOOL=OFF
-Dprotobuf_WITH_ZLIB:BOOL=OFF
-Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT}
DEPENDS grpc-repo
)
# Location where protobuf-config.cmake will be installed varies by
# platform
if (WIN32)
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}/protobuf/cmake")
else()
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${INSTALL_ROOT}/lib/cmake")
endif()
#
# Build c-area project from grpc-repo
#
ExternalProject_Add(c-ares
SOURCE_DIR "${SOURCE_ROOT}/grpc-repo/third_party/cares/cares"
DOWNLOAD_COMMAND ""
CMAKE_ARGS
-DCARES_SHARED:BOOL=OFF
-DCARES_STATIC:BOOL=ON
-DCARES_STATIC_PIC:BOOL=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT}
DEPENDS grpc-repo
)
#
# Build GRPC
#
ExternalProject_Add(grpc
SOURCE_DIR "${SOURCE_ROOT}/grpc-repo"
DOWNLOAD_COMMAND ""
CMAKE_ARGS
-DgRPC_INSTALL:BOOL=ON
-DgRPC_BUILD_TESTS:BOOL=OFF
-DgRPC_PROTOBUF_PROVIDER:STRING=package
-DgRPC_PROTOBUF_PACKAGE_TYPE:STRING=CONFIG
-DProtobuf_DIR:PATH=${INSTALL_ROOT}/lib/cmake
-DgRPC_ZLIB_PROVIDER:STRING=package
-DgRPC_CARES_PROVIDER:STRING=package
-Dc-ares_DIR:PATH=${INSTALL_ROOT}/lib/cmake
-DgRPC_SSL_PROVIDER:STRING=package
-DgRPC_GFLAGS_PROVIDER=package
-DgRPC_BENCHMARK_PROVIDER=package
-DgRPC_RE2_PROVIDER:STRING=package
-Dre2_DIR:STRING=${INSTALL_ROOT}/lib/cmake
-DgRPC_ABSL_PROVIDER:STRING=package
-Dabsl_DIR:STRING=${INSTALL_ROOT}/lib/cmake
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT}
DEPENDS grpc-repo c-ares protobuf re2 absl gflags benchmark
)
# trtlab
# ======
ExternalProject_Add (trtlab
DEPENDS ${DEPENDENCIES}
SOURCE_DIR ${PROJECT_SOURCE_DIR}
CMAKE_ARGS -DBUILD_DEPENDENCIES=OFF ${EXTRA_CMAKE_ARGS}
INSTALL_COMMAND ""
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
================================================
FILE: devel.sh
================================================
#!/bin/bash
#
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
models_path=${TRT_MODELS_PATH:-"/path/to/my/models"}
models_cli=""
if [ -d "$models_path" ]; then
models_cli=" -v $(realpath $models_path):/work/models "
fi
crt=""
if [ -x "$(which luda)" ] ; then
echo "Using luda"
crt="$(which luda) --no-home"
elif [ -x "$(which nvidia-docker)" ]; then
echo "Using nvidia-docker"
crt="nvidia-docker run --rm -ti"
else
echo "No GPU container runtime found"
exit 911
fi
NV_GPU=0 $crt -v $PWD:/work $models_cli --workdir /work --name trtlab --net host trtlab
================================================
FILE: examples/00_TensorRT/CMakeLists.txt
================================================
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${TensorRT_INCLUDE_DIRS})
add_executable(inference.x
inference.cc
${PROTO_SRCS}
${PROTO_GRPC_SRCS})
target_link_libraries(inference.x
trtlab::nvrpc
trtlab::tensorrt
gflags
)
add_executable(infer.x
infer.cc
${PROTO_SRCS}
${PROTO_GRPC_SRCS})
target_link_libraries(infer.x
trtlab::nvrpc
trtlab::tensorrt
gflags
)
if(YAIS_ENABLE_MPI)
find_package(MPI)
include_directories(SYSTEM ${MPI_INCLUDE_PATH})
target_link_libraries(inference.x
${MPI_C_LIBRARIES}
${MPI_CXX_LIBRARIES}
)
target_compile_definitions(inference.x PUBLIC PLAYGROUND_USE_MPI)
endif()
================================================
FILE: examples/00_TensorRT/README.md
================================================
# Inference Example
Basic CLI tool for executing TensorRT engines.
Provide an engine and `inference.x` will run a simplifed inference pipeline using synthetic data.
The program will run a pipelined H2D -> TensorRT -> D2H calculation for `--seconds` (default: 5) with a
0.1 second warmup run. By default, only 1 TensorRT Execution Context is used to perform the evaulation.
You can modify the number of contexts using the `--contexts`. Unless provided, the number of Input/Output
Buffers is set to `(2 * contexts)`. See below for the list of [options](#options).
The `inference.x` program is fully pipelined and asynchronous. It performs uses three threads (default)
to: 1) async copy input H2D, 2) launch the async inference evaluation and return output tensor to the host,
and 3) to wait on the resouces used during execution and release them when finished. This final thread is
where one might build a return message or do something else with the results.
While running `inference.x`, you may find it useful to monitor GPU metrics using:
```
nvidia-smi dmon -i 0 -s put
```
Note: If you see numbers that differ from the output of `giexec`, you may have an IO bottleneck in that
the transfers are more expensive than the compute.
* TODO: Update the program to output avg xfer time.
* TODO: Build .engine files as part of the build
## Quickstart
```
root@dgx:/work/build/examples/00_TensorRT# ./inference.x --engine=/work/models/ResNet-50-b1-int8.engine
I0702 22:16:51.868419 10857 TensorRT.cc:561] -- Initialzing TensorRT Resource Manager --
I0702 22:16:51.868676 10857 TensorRT.cc:562] Maximum Execution Concurrency: 1
I0702 22:16:51.868686 10857 TensorRT.cc:563] Maximum Copy Concurrency: 2
I0702 22:16:53.430330 10857 TensorRT.cc:628] -- Registering Model: 0 --
I0702 22:16:53.430399 10857 TensorRT.cc:629] Input/Output Tensors require 591.9 KiB
I0702 22:16:53.430415 10857 TensorRT.cc:630] Execution Activations require 2.5 MiB
I0702 22:16:53.430428 10857 TensorRT.cc:633] Weights require 30.7 MiB
I0702 22:16:53.437571 10857 TensorRT.cc:652] -- Allocating TensorRT Resources --
I0702 22:16:53.437587 10857 TensorRT.cc:653] Creating 1 TensorRT execution tokens.
I0702 22:16:53.437595 10857 TensorRT.cc:654] Creating a Pool of 2 Host/Device Memory Stacks
I0702 22:16:53.437607 10857 TensorRT.cc:655] Each Host Stack contains 608.0 KiB
I0702 22:16:53.437614 10857 TensorRT.cc:656] Each Device Stack contains 3.2 MiB
I0702 22:16:53.437623 10857 TensorRT.cc:657] Total GPU Memory: 6.5 MiB
I0702 22:16:53.540400 10857 inference.cc:93] -- Inference: Running for ~5 seconds with batch_size 1 --
I0702 22:16:58.543475 10857 inference.cc:131] Inference Results: 4770 batches in 5.00307 seconds; sec/batch: 0.00104886; inf/sec: 953.414
```
## Options
```
-buffers (Number of Buffers (default: 2x contexts)) type: int32 default: 0
-contexts (Number of Execution Contexts) type: int32 default: 1
-cudathreads (Number Cuda Launcher Threads) type: int32 default: 1
-engine (TensorRT serialized engine) type: string
default: "/work/models/trt4.engine"
-respthreads (Number Response Sync Threads) type: int32 default: 1
-seconds (Number of Execution Contexts) type: int32 default: 5
```
================================================
FILE: examples/00_TensorRT/infer.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/stat.h>
#include <unistd.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "tensorrt/laboratory/core/thread_pool.h"
#include "tensorrt/laboratory/infer_bench.h"
#include "tensorrt/laboratory/inference_manager.h"
#include "tensorrt/laboratory/model.h"
#include "tensorrt/laboratory/runtime.h"
#ifdef PLAYGROUND_USE_MPI
#include "mpi.h"
#define MPI_CHECK(mpicall) mpicall
#else
#define MPI_CHECK(mpicall)
#endif
using trtlab::ThreadPool;
using trtlab::TensorRT::InferBench;
using trtlab::TensorRT::InferBenchKey;
using trtlab::TensorRT::InferenceManager;
using trtlab::TensorRT::ManagedRuntime;
using trtlab::TensorRT::Model;
using trtlab::TensorRT::Runtime;
using trtlab::TensorRT::StandardRuntime;
static std::string ModelName(int model_id)
{
std::ostringstream stream;
stream << model_id;
return stream.str();
}
static bool ValidateEngine(const char* flagname, const std::string& value)
{
struct stat buffer;
return (stat(value.c_str(), &buffer) == 0);
}
DEFINE_string(engine, "/path/to/tensorrt.engine", "TensorRT serialized engine");
DEFINE_validator(engine, &ValidateEngine);
DEFINE_string(runtime, "default", "TensorRT Runtime");
DEFINE_int32(seconds, 5, "Approximate number of seconds for the timing loop");
DEFINE_int32(contexts, 1, "Number of Execution Contexts");
DEFINE_int32(buffers, 0, "Number of Buffers (default: 2x contexts)");
DEFINE_int32(cudathreads, 1, "Number Cuda Launcher Threads");
DEFINE_int32(respthreads, 1, "Number Response Sync Threads");
DEFINE_int32(replicas, 1, "Number of Replicas of the Model to load");
DEFINE_int32(batch_size, 0, "Overrides the max batch_size of the provided engine");
int main(int argc, char* argv[])
{
FLAGS_alsologtostderr = 1; // Log to console
::google::InitGoogleLogging("TensorRT Inference");
::google::ParseCommandLineFlags(&argc, &argv, true);
MPI_CHECK(MPI_Init(&argc, &argv));
auto contexts = FLAGS_contexts;
auto buffers = FLAGS_buffers ? FLAGS_buffers : 2 * FLAGS_contexts;
auto resources = std::make_shared<InferenceManager>(contexts, buffers);
resources->RegisterThreadPool("pre", std::make_unique<ThreadPool>(1));
resources->RegisterThreadPool("cuda", std::make_unique<ThreadPool>(1));
resources->RegisterThreadPool("post", std::make_unique<ThreadPool>(3));
//, FLAGS_cudathreads, FLAGS_respthreads);
std::shared_ptr<Runtime> runtime;
if(FLAGS_runtime == "default")
{
runtime = std::make_shared<StandardRuntime>();
}
else if(FLAGS_runtime == "unified")
{
runtime = std::make_shared<ManagedRuntime>();
}
else
{
LOG(FATAL) << "Invalid TensorRT Runtime";
}
std::vector<std::shared_ptr<Model>> models;
models.push_back(runtime->DeserializeEngine(FLAGS_engine));
resources->RegisterModel("0", models.back());
resources->AllocateResources();
auto batch_size = FLAGS_batch_size ? FLAGS_batch_size : models.back()->GetMaxBatchSize();
for(int i = 1; i < FLAGS_replicas; i++)
{
models.push_back(runtime->DeserializeEngine(FLAGS_engine));
resources->RegisterModel(ModelName(i), models.back());
}
{
InferBench benchmark(resources);
benchmark.Run(models, batch_size, 0.1);
// if testing mps - sync all processes before executing timed loop
MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));
auto results = benchmark.Run(models, batch_size, FLAGS_seconds);
MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));
// todo: perform an mpi_allreduce to collect the per process timings
// for a simplified report
MPI_CHECK(MPI_Finalize());
using namespace trtlab::TensorRT;
LOG(INFO) << "Inference Results: " << (*results)[kBatchesComputed]
<< " batches computed in " << (*results)[kWalltime] << " seconds on "
<< (*results)[kMaxExecConcurrency]
<< " compute streams using batch_size: " << (*results)[kBatchSize]
<< "; inf/sec: " << (*results)[kInferencesPerSecond]
<< "; batches/sec: " << (*results)[kBatchesPerSecond]
<< "; execution time per batch: " << (*results)[kExecutionTimePerBatch];
}
return 0;
}
================================================
FILE: examples/00_TensorRT/inference.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/stat.h>
#include <unistd.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "tensorrt/laboratory/core/thread_pool.h"
#include "tensorrt/laboratory/inference_manager.h"
#include "tensorrt/laboratory/runtime.h"
#ifdef PLAYGROUND_USE_MPI
#include "mpi.h"
#define MPI_CHECK(mpicall) mpicall
#else
#define MPI_CHECK(mpicall)
#endif
using trtlab::ThreadPool;
using trtlab::TensorRT::CustomRuntime;
using trtlab::TensorRT::InferenceManager;
using trtlab::TensorRT::ManagedAllocator;
using trtlab::TensorRT::Runtime;
using trtlab::TensorRT::StandardAllocator;
static int g_Concurrency = 0;
static std::string ModelName(int model_id)
{
std::ostringstream stream;
stream << model_id;
return stream.str();
}
class InferenceResources : public InferenceManager
{
public:
InferenceResources(int max_executions, int max_buffers, size_t nCuda, size_t nResp)
: InferenceManager(max_executions, max_buffers),
m_CudaThreadPool(std::make_unique<ThreadPool>(nCuda)),
m_ResponseThreadPool(std::make_unique<ThreadPool>(nResp))
{
}
~InferenceResources() override {}
std::unique_ptr<ThreadPool>& GetCudaThreadPool() { return m_CudaThreadPool; }
std::unique_ptr<ThreadPool>& GetResponseThreadPool() { return m_ResponseThreadPool; }
private:
std::unique_ptr<ThreadPool> m_CudaThreadPool;
std::unique_ptr<ThreadPool> m_ResponseThreadPool;
};
class Inference final
{
public:
Inference(std::shared_ptr<InferenceResources> resources) : m_Resources(resources) {}
void Run(float seconds, bool warmup, int replicas, uint32_t requested_batch_size)
{
int replica = 0;
uint64_t inf_count = 0;
auto start = std::chrono::steady_clock::now();
auto elapsed = [start]() -> float {
return std::chrono::duration<float>(std::chrono::steady_clock::now() - start).count();
};
auto model = GetResources()->GetModel(ModelName(replica++));
auto batch_size = requested_batch_size ? requested_batch_size : model->GetMaxBatchSize();
if(batch_size > model->GetMaxBatchSize())
{
LOG(FATAL)
<< "Requested batch_size greater than allowed by the compiled TensorRT Engine";
}
// Inference Loop - Main thread copies, cuda thread launches, response thread completes
if(!warmup)
{
LOG(INFO) << "-- Inference: Running for ~" << (int)seconds
<< " seconds with batch_size " << batch_size << " --";
}
std::vector<std::future<void>> futures;
while(elapsed() < seconds && ++inf_count)
{
if(replica >= replicas) replica = 0;
// This thread only async copies buffers H2D
auto model = GetResources()->GetModel(ModelName(replica++));
auto buffers = GetResources()->GetBuffers(); // <=== Limited Resource; May Block !!!
auto bindings = buffers->CreateBindings(model);
auto promise = std::make_shared<std::promise<void>>();
futures.push_back(promise->get_future());
bindings->SetBatchSize(batch_size);
bindings->CopyToDevice(bindings->InputBindings());
GetResources()->GetCudaThreadPool()->enqueue([this, bindings, promise]() mutable {
// This thread enqueues two async kernels:
// 1) TensorRT execution
// 2) D2H of output tensors
auto trt = GetResources()->GetExecutionContext(
bindings->GetModel()); // <=== Limited Resource; May Block !!!
trt->Infer(bindings);
bindings->CopyFromDevice(bindings->OutputBindings());
GetResources()->GetResponseThreadPool()->enqueue(
[bindings, trt, promise]() mutable {
// This thread waits on the completion of the async compute and the async
// copy
trt->Synchronize();
trt.reset(); // Finished with the Execution Context - Release it to
// competing threads
bindings->Synchronize();
bindings.reset(); // Finished with Buffers - Release it to competing threads
promise->set_value();
});
});
}
for(const auto& f : futures)
{
f.wait();
}
/*
// Join worker threads
if (!warmup)
GetResources()->GetCudaThreadPool().reset();
if (!warmup)
GetResources()->GetResponseThreadPool().reset();
*/
// End timing and report
auto total_time = std::chrono::duration<float>(elapsed()).count();
auto inferences = inf_count * batch_size;
if(!warmup)
LOG(INFO) << "Inference Results: " << inf_count << "; batches in " << total_time
<< " seconds"
<< "; sec/batch/stream: " << total_time / (inf_count / g_Concurrency)
<< "; batches/sec: " << inf_count / total_time
<< "; inf/sec: " << inferences / total_time;
}
protected:
inline std::shared_ptr<InferenceResources> GetResources() { return m_Resources; }
private:
std::shared_ptr<InferenceResources> m_Resources;
};
static bool ValidateEngine(const char* flagname, const std::string& value)
{
struct stat buffer;
return (stat(value.c_str(), &buffer) == 0);
}
DEFINE_string(engine, "/path/to/tensorrt.engine", "TensorRT serialized engine");
DEFINE_validator(engine, &ValidateEngine);
DEFINE_string(runtime, "default", "TensorRT Runtime");
DEFINE_int32(seconds, 5, "Approximate number of seconds for the timing loop");
DEFINE_int32(contexts, 1, "Number of Execution Contexts");
DEFINE_int32(buffers, 0, "Number of Buffers (default: 2x contexts)");
DEFINE_int32(cudathreads, 1, "Number Cuda Launcher Threads");
DEFINE_int32(respthreads, 1, "Number Response Sync Threads");
DEFINE_int32(replicas, 1, "Number of Replicas of the Model to load");
DEFINE_int32(batch_size, 0, "Overrides the max batch_size of the provided engine");
int main(int argc, char* argv[])
{
FLAGS_alsologtostderr = 1; // Log to console
::google::InitGoogleLogging("TensorRT Inference");
::google::ParseCommandLineFlags(&argc, &argv, true);
MPI_CHECK(MPI_Init(&argc, &argv));
auto contexts = g_Concurrency = FLAGS_contexts;
auto buffers = FLAGS_buffers ? FLAGS_buffers : 2 * FLAGS_contexts;
auto resources = std::make_shared<InferenceResources>(contexts, buffers, FLAGS_cudathreads,
FLAGS_respthreads);
std::shared_ptr<Runtime> runtime;
if(FLAGS_runtime == "default")
{
runtime = std::make_shared<CustomRuntime<StandardAllocator>>();
}
else if(FLAGS_runtime == "unified")
{
runtime = std::make_shared<CustomRuntime<ManagedAllocator>>();
}
else
{
LOG(FATAL) << "Invalid TensorRT Runtime";
}
resources->RegisterModel("0", runtime->DeserializeEngine(FLAGS_engine));
resources->AllocateResources();
for(int i = 1; i < FLAGS_replicas; i++)
{
resources->RegisterModel(ModelName(i), runtime->DeserializeEngine(FLAGS_engine));
}
Inference inference(resources);
inference.Run(0.1, true, 1, 0); // warmup
// if testing mps - sync all processes before executing timed loop
MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));
inference.Run(FLAGS_seconds, false, FLAGS_replicas, FLAGS_batch_size);
MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));
// todo: perform an mpi_allreduce to collect the per process timings
// for a simplified report
MPI_CHECK(MPI_Finalize());
return 0;
}
================================================
FILE: examples/01_Basic_GRPC/CMakeLists.txt
================================================
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add_executable(echo-grpc.x
src/server.cpp)
target_link_libraries(echo-grpc.x
nvrpc
echo-protos
gflags
)
add_executable(echo-client.x
src/client.cpp)
target_link_libraries(echo-client.x
nvrpc
echo-protos
gflags
)
add_executable(async-echo-client.x
src/async_client.cc)
target_link_libraries(async-echo-client.x
nvrpc
nvrpc-client
echo-protos
gflags
)
================================================
FILE: examples/01_Basic_GRPC/README.md
================================================
Simple service to test and stress the core service and request logic.
The [`server.cc`](examples/01_Basic_GRPC/server.cc) is very well documented and
should be used a reference for the gRPC interface provided by the library.
================================================
FILE: examples/01_Basic_GRPC/src/async_client.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <chrono>
#include <iostream>
#include <memory>
#include <string>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <grpcpp/grpcpp.h>
#include "nvrpc/client/client_unary.h"
#include "nvrpc/client/executor.h"
#include "echo.grpc.pb.h"
using grpc::Channel;
using grpc::ClientContext;
using grpc::Status;
using simple::Inference;
using simple::Input;
using simple::Output;
using nvrpc::client::ClientUnary;
using nvrpc::client::Executor;
DEFINE_int32(count, 100, "number of grpc messages to send");
DEFINE_int32(thread_count, 1, "Size of thread pool");
int main(int argc, char** argv)
{
// Instantiate the client. It requires a channel, out of which the actual RPCs
// are created. This channel models a connection to an endpoint (in this case,
// localhost at port 50051). We indicate that the channel isn't authenticated
// (use of InsecureChannelCredentials()).
FLAGS_alsologtostderr = 1; // It will dump to console
::google::ParseCommandLineFlags(&argc, &argv, true);
auto executor = std::make_shared<Executor>(FLAGS_thread_count);
auto channel = grpc::CreateChannel("localhost:50051", grpc::InsecureChannelCredentials());
auto stub = Inference::NewStub(channel);
auto infer_prepare_fn = [&stub](::grpc::ClientContext * context, const ::simple::Input& request,
::grpc::CompletionQueue* cq) -> auto
{
return std::move(stub->PrepareAsyncCompute(context, request, cq));
};
auto runner = std::make_unique<ClientUnary<Input, Output>>(infer_prepare_fn, executor);
auto start = std::chrono::steady_clock::now();
auto elapsed = [start]() -> float {
return std::chrono::duration<float>(std::chrono::steady_clock::now() - start).count();
};
for(int i = 0; i < FLAGS_count; i++)
{
Input input;
input.set_batch_id(i);
runner->Enqueue(std::move(input),
[i](Input& input, Output& output, ::grpc::Status& status) -> bool {
CHECK(output.batch_id() == i);
LOG_FIRST_N(INFO, 20) << "Check: " << i;
return (bool)(output.batch_id() == i);
});
}
std::cout << FLAGS_count << " queued in " << elapsed() << "seconds" << std::endl;
executor->ShutdownAndJoin();
std::cout << FLAGS_count << " completed in " << elapsed() << "seconds" << std::endl;
return 0;
}
================================================
FILE: examples/01_Basic_GRPC/src/client.cpp
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <iostream>
#include <memory>
#include <string>
#include <chrono>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <grpcpp/grpcpp.h>
#include "echo.grpc.pb.h"
using grpc::Channel;
using grpc::ClientContext;
using grpc::Status;
using simple::Input;
using simple::Output;
using simple::Inference;
class SimpleClient {
public:
SimpleClient(std::shared_ptr<Channel> channel)
: stub_(Inference::NewStub(channel)) {}
// Assembles the client's payload, sends it and presents the response back
// from the server.
int Compute(const int batch_id) {
// Data we are sending to the server.
Input request;
request.set_batch_id(batch_id);
// Container for the data we expect from the server.
Output reply;
// Context for the client. It could be used to convey extra information to
// the server and/or tweak certain RPC behaviors.
ClientContext context;
// The actual RPC.
Status status = stub_->Compute(&context, request, &reply);
// Act upon its status.
if (status.ok()) {
return reply.batch_id();
} else {
std::cout << status.error_code() << ": " << status.error_message()
<< std::endl;
return -1;
}
}
private:
std::unique_ptr<Inference::Stub> stub_;
};
DEFINE_int32(count, 100, "number of grpc messages to send");
int main(int argc, char** argv) {
// Instantiate the client. It requires a channel, out of which the actual RPCs
// are created. This channel models a connection to an endpoint (in this case,
// localhost at port 50051). We indicate that the channel isn't authenticated
// (use of InsecureChannelCredentials()).
FLAGS_alsologtostderr = 1; // It will dump to console
::google::ParseCommandLineFlags(&argc, &argv, true);
SimpleClient client(grpc::CreateChannel(
"localhost:50051", grpc::InsecureChannelCredentials()));
auto start = std::chrono::steady_clock::now();
for(int i=0; i<FLAGS_count; i++) {
auto reply = client.Compute(i);
if(reply == -1 || reply != i) std::cout << "BatchId received: " << reply << std::endl;
}
auto end = std::chrono::steady_clock::now();
float elapsed = std::chrono::duration<float>(end - start).count();
std::cout << FLAGS_count << " requests in " << elapsed << "seconds" << std::endl;
return 0;
}
================================================
FILE: examples/01_Basic_GRPC/src/server.cpp
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <chrono>
#include <thread>
#include "nvrpc/server.h"
#include "nvrpc/service.h"
#include "nvrpc/executor.h"
#include "tensorrt/laboratory/core/pool.h"
#include "tensorrt/laboratory/core/resources.h"
#include "tensorrt/laboratory/core/thread_pool.h"
#include "echo.pb.h"
#include "echo.grpc.pb.h"
using nvrpc::AsyncService;
using nvrpc::AsyncRPC;
using nvrpc::Context;
using nvrpc::Executor;
using nvrpc::Server;
using trtlab::Resources;
using trtlab::ThreadPool;
// CLI Options
DEFINE_int32(thread_count, 1, "Size of thread pool");
/**
* Embedding a copy of the Protobuf specification for the gRPC service.
*
* Package Name: simple
* Service Name: Inference
* RPC Name: Compute
*
* Incoming Message: Input
* Outgoing Message: Ouput
**
syntax = "proto3";
package simple;
service Inference {
rpc Compute (Input) returns (Output) {}
}
message Input {
uint64 batch_id = 1;
}
message Output {
uint64 batch_id = 1;
}
*/
// Define the resources your RPC will need to execute
// ==================================================
// In this case, all simple::Inference::Compute RPCs share a threadpool in which they will
// queue up some work on. This essentially means, after the message as been received and
// processed, the actual work for the RPC is pushed to a worker pool outside the scope of
// the transaction processing system (TPS). This is essentially async computing, we have
// decoupled the transaction from the workers executing the implementation. The TPS can
// continue to queue work, while the workers process the load.
struct SimpleResources : public Resources
{
SimpleResources(int numThreadsInPool=3) : m_ThreadPool(numThreadsInPool) {
LOG(INFO) << "Server ThreadCount: " << numThreadsInPool;
}
ThreadPool& AcquireThreadPool()
{
return m_ThreadPool;
}
private:
ThreadPool m_ThreadPool;
};
// Contexts hold the state and provide the definition of the work to be performed by the RPC.
// This is where you define what gets executed for a given RPC.
// Incoming Message = simple::Input (RequestType)
// Outgoing Message = simple::Output (ResponseType)
class SimpleContext final : public Context<simple::Input, simple::Output, SimpleResources>
{
void ExecuteRPC(RequestType &input, ResponseType &output) final override
{
// We could do work here, but we'd block the TPS, i.e. the threads pulling messages
// off the incoming recieve queue. Very quick responses are best done here; however,
// longer running workload should be offloaded so the TPS can avoid being blocked.
GetResources()->AcquireThreadPool().enqueue([this, &input, &output]{
// Now running on a worker thread of the ThreadPool defined in SimpleResources.
// Here we are just echoing back the incoming // batch_id; however, in later
// examples, we'll show how to run an async cuda pipline.
LOG_FIRST_N(INFO, 20) << "Tag = " << Tag() << " Thread = " << std::this_thread::get_id();
output.set_batch_id(input.batch_id());
this->FinishResponse();
});
// The TPS thread is now free to continue processing message - async ftw!
}
};
int main(int argc, char *argv[])
{
FLAGS_alsologtostderr = 1; // Log to console
::google::InitGoogleLogging("simpleServer");
::google::ParseCommandLineFlags(&argc, &argv, true);
// A server will bind an IP:PORT to listen on
Server server("0.0.0.0:50051");
// A server can host multiple services
LOG(INFO) << "Register Service (simple::Inference) with Server";
auto simpleInference = server.RegisterAsyncService<simple::Inference>();
// An RPC has two components that need to be specified when registering with the service:
// 1) Type of Execution Context (SimpleContext). The execution context defines the behavor
// of the RPC, i.e. it contains the control logic for the execution of the RPC.
// 2) The Request function (RequestCompute) which was generated by gRPC when compiling the
// protobuf which defined the service. This function is responsible for queuing the
// RPC's execution context to the
LOG(INFO) << "Register RPC (simple::Inference::Compute) with Service (simple::Inference)";
auto rpcCompute = simpleInference->RegisterRPC<SimpleContext>(
&simple::Inference::AsyncService::RequestCompute
);
LOG(INFO) << "Initializing Resources for RPC (simple::Inference::Compute)";
auto rpcResources = std::make_shared<SimpleResources>(FLAGS_thread_count);
// Create Executors - Executors provide the messaging processing resources for the RPCs
// Multiple Executors can be registered with a Server. The executor is responsible
// for pulling incoming message off the receive queue and executing the associated
// context. By default, an executor only uses a single thread. A typical usecase is
// an Executor executes a context, which immediate pushes the work to a thread pool.
// However, for very low-latency messaging, you might want to use a multi-threaded
// Executor and a Blocking Context - meaning the Context performs the entire RPC function
// on the Executor's thread.
LOG(INFO) << "Creating Executor";
auto executor = server.RegisterExecutor(new Executor(1));
// You can register RPC execution contexts from any registered RPC on any executor.
// The power of that will become clear in later examples. For now, we will register
// 10 instances of the simple::Inference::Compute RPC's SimpleContext execution context
// with the Executor.
LOG(INFO) << "Creating Execution Contexts for RPC (simple::Inference::Compute) with Executor";
executor->RegisterContexts(rpcCompute, rpcResources, 10);
LOG(INFO) << "Running Server";
server.Run(std::chrono::milliseconds(2000), []{
// This is a timeout loop executed every 2seconds
// Run() with no arguments will run an empty timeout loop every 5 seconds.
// RunAsync() will return immediately, its your responsibility to ensure the
// server doesn't go out of scope or a Shutdown will be triggered on your services.
});
}
================================================
FILE: examples/02_TensorRT_GRPC/CMakeLists.txt
================================================
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
find_package(prometheus-cpp CONFIG REQUIRED)
if(prometheus-cpp_FOUND)
message(STATUS "Prometheus Metics Enabled")
endif(prometheus-cpp_FOUND)
add_executable(inference-grpc.x
src/metrics.cc
src/server.cc
)
target_include_directories(inference-grpc.x
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)
target_link_libraries(inference-grpc.x
trtlab::nvrpc
trtlab::tensorrt
prometheus-cpp::prometheus-cpp
demo-protos
gflags
)
add_executable(client-sync.x
src/sync-client.cc
)
target_link_libraries(client-sync.x
nvrpc
demo-protos
gflags
)
add_executable(client-async.x
src/async-client.cc
)
target_link_libraries(client-async.x
nvrpc
demo-protos
gflags
)
add_executable(siege.x
src/siege.cc
)
target_link_libraries(siege.x
nvrpc
demo-protos
gflags
)
================================================
FILE: examples/02_TensorRT_GRPC/README.md
================================================
# TensorRT GRPC Example
This examples extends the [TensorRT](examples/00_TensorRT) compute loop into an
async gRPC service similar to [example 01_gRPC](examples/01_GRPC).
There are three take-aways from this example:
1. TensorRT compute pipeline is implemented as the `ExecuteRPC` virtual function
of the `Context`.
2. An external datasource is used to override the input bindings
3. Custom [Prometheus](https://prometheus.io) metrics for inference compute and
request durations, load ratio, and GPU power gauge.
are recorded/observed.
## Quickstart
```
cd /work/build/examples/02_TensorRT_GRPC
./inference-grpc.x --contexts=8 --engine=/work/models/ResNet-50-b1-int8.engine --port 50051 &
./siege.x --port=50051 --rate=2500
# ctrl+c to cancel client
telegraf -test -config /work/examples/91_Prometheus/scrape.conf
```
## Explore
Fun things to try:
* Evaluate the performance of the model using `inference.x` in
[examples/00_TensorRT](examples/00_TensorRT)
* Try running `siege.x` below, at, and above the benchmarked rate and watch the metrics
via `telegraf`.
* Deploy on Kubernetes, collect metrics via Prometheus and visualize using Grafana;
[examples/90_Kubernetes](examples/90_Kubernetes).
## Server/Service
`inference-grpc.x` CLI options:
* `--engine` - the compiled TensorRT plan/engine
* `--contexts` - the maximium number of concurrent evaluations of the engine.
* `--port` - the port on which requests are received (default: 50051)
* `--metrics` - the port on which to expose metrics to be scraped (default: 50078)
## Clients
Three clients are available:
* `client-sync.x` - sends a blocking inference request to the service and waits for the
response. Only 1 request is ever in-flight at a given time.
* `client-async.x` - the async client is capable of issuing multiple in-flight requests.
Note: the load-balancer is limited to 1000 outstanding requests per client before circuit-
breaking. Running more than 1000 requests will trigger 503 if targeting the envoy load-
balancer. The client has no backoff and will try to send the full compliment of requested
inference requests. `siege.x` is the better async client.
* `siege.x` - constant rate (`--rate`) async engine that is hard-coded to have no more than
950 outstanding in-flight requests. A warning will be given client-side if the outstanding
requests tops meaning the rate is limited by the server-side compute.
TODO:
* Add more varied test clients akin to [Netflix's Chaos Monkeys](https://github.com/Netflix/chaosmonkey),
but for gRPC client behavior.
* Random rate, random pulses, canceled messsages, messages wiht unreasonable timeouts, etc.
## Metrics
YAIS metrics are gathered and exposed via the [prometheus-cpp](https://github.com/jupp0r/prometheus-cpp)
client library. In this example, we expose four custom
[metrics](https://prometheus.io/docs/concepts/metric_types/): 2 Summaries, 1 Histogram and 1 Gauge.
* `compute_duration` and `request_duration` are summaries recored with the model
name as a component of the metric. This is useful for evaluating how a given
model is performing, but this is not a good metric to aggregate across multiple
service.
* `load_ratio` is a histogram of `request_duraton / compute_duration`. Ideally, this
unitless value is just over 1.0. Values higher than 1.0 are indictive of some
delays in the compute of a given request. Sources of delays include, overloaded
queues and/or starvation of resources. Histograms can be aggregated across services,
which makes this metric a good candidate for triggering auto-scaling.
* `gpu_power` is a simple gauge that periodicly reports the instaneous power being
consumed by the device. As the load increases on the service, the power should
increase proprotionally, until the power is capped either by device limits or compute
resources. When power capped, the `load_ratio` will begin to increase under futher
increases in traffic.
### Acquiring Metrics
Prometheus metrics are generally scraped by a Prometheus service. When using Kubernetes
to deploy services, the [prometheus-operator](https://github.com/coreos/prometheus-operator)
provides a [`ServiceMonitor`](https://github.com/coreos/prometheus-operator#customresourcedefinitions)
which allows you to define custom scraping configuration per service. See the
[Kubernetes example](examples/90_Kubernetes) for more details.
While testing, you can use the [`telegraf`](https://github.com/influxdata/telegraf) application
to scrape local services.
```
# start service
telegraf -test -config /work/examples/91_Prometheus/scrape.conf
```
Here is some sample output (line breaks added for readability):
```
> yais_inference_compute_duration_ms,host=dgx,model=flowers,url=http://localhost:50078/metrics count=1000,sum=2554.070996 1530985302000000000
> yais_inference_compute_duration_ms_quantile,host=dgx,model=flowers,quantile=0.500000,url=http://localhost:50078/metrics value=2.526903 1530985302000000000
> yais_inference_compute_duration_ms_quantile,host=dgx,model=flowers,quantile=0.900000,url=http://localhost:50078/metrics value=2.625447 1530985302000000000
> yais_inference_compute_duration_ms_quantile,host=dgx,model=flowers,quantile=0.990000,url=http://localhost:50078/metrics value=2.855728 1530985302000000000
> yais_inference_request_duration_ms,host=dgx,model=flowers,url=http://localhost:50078/metrics count=1000,sum=243547.558097 1530985302000000000
> yais_inference_request_duration_ms_quantile,host=dgx,model=flowers,quantile=0.500000,url=http://localhost:50078/metrics value=253.216653 1530985302000000000
> yais_inference_request_duration_ms_quantile,host=dgx,model=flowers,quantile=0.900000,url=http://localhost:50078/metrics value=256.715759 1530985302000000000
> yais_inference_request_duration_ms_quantile,host=dgx,model=flowers,quantile=0.990000,url=http://localhost:50078/metrics value=275.407232 1530985302000000000
> yais_inference_load_ratio,host=dgx,url=http://localhost:50078/metrics +Inf=1000,1.25=1,1.5=1,10=9,100=253,2=1,count=1000,sum=95879.013208 1530985302000000000
> yais_gpus_power_usage,gpu=0,host=dgx,url=http://localhost:50078/metrics gauge=52.821 1530985302000000000
> yais_executor_queue_depth,host=dgx,url=http://localhost:50078/metrics gauge=0 1530985302000000000
```
### Best Practices
For a good description of using histograms vs. summaries to collect meaningful metrics
see: https://prometheus.io/docs/practices/histograms/
Two rules of thumb:
- If you need to aggregate, choose histograms.
- Otherwise, choose a histogram if you have an idea of the range and distribution of
values that will be observed. Choose a summary if you need an accurate quantile, no
matter what the range and distribution of the values is.
================================================
FILE: examples/02_TensorRT_GRPC/src/async-client.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Original Copyright proivded below.
* This work extends the original gRPC client examples to work with the
* implemented server.
*
* Copyright 2015 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include <chrono>
#include <iostream>
#include <memory>
#include <string>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <grpc/support/log.h>
#include <grpcpp/grpcpp.h>
#include <thread>
#include "inference.grpc.pb.h"
using grpc::Channel;
using grpc::ClientAsyncResponseReader;
using grpc::ClientContext;
using grpc::CompletionQueue;
using grpc::Status;
using ssd::BatchInput;
using ssd::BatchPredictions;
using ssd::Inference;
class GreeterClient
{
public:
explicit GreeterClient(std::shared_ptr<Channel> channel) : stub_(Inference::NewStub(channel)) {}
// Assembles the client's payload and sends it to the server.
void SayHello(const size_t batch_id, const int batch_size)
{
// Data we are sending to the server.
BatchInput request;
request.set_batch_id(batch_id);
request.set_batch_size(batch_size);
// Call object to store rpc data
AsyncClientCall* call = new AsyncClientCall;
// stub_->PrepareAsyncSayHello() creates an RPC object, returning
// an instance to store in "call" but does not actually start the RPC
// Because we are using the asynchronous API, we need to hold on to
// the "call" instance in order to get updates on the ongoing RPC.
call->response_reader = stub_->PrepareAsyncCompute(&call->context, request, &cq_);
// StartCall initiates the RPC call
call->response_reader->StartCall();
// Request that, upon completion of the RPC, "reply" be updated with the
// server's response; "status" with the indication of whether the operation
// was successful. Tag the request with the memory address of the call object.
call->response_reader->Finish(&call->reply, &call->status, (void*)call);
}
// Loop while listening for completed responses.
// Prints out the response from the server.
void AsyncCompleteRpc()
{
void* got_tag;
bool ok = false;
// Block until the next result is available in the completion queue "cq".
while(cq_.Next(&got_tag, &ok))
{
// The tag in this example is the memory location of the call object
AsyncClientCall* call = static_cast<AsyncClientCall*>(got_tag);
// Verify that the request was completed successfully. Note that "ok"
// corresponds solely to the request for updates introduced by Finish().
GPR_ASSERT(ok);
if(call->status.ok())
{
// std::cout << "Greeter received: " << call->reply.batch_id() << std::endl;
}
else
{
std::cout << "RPC failed" << std::endl;
}
// Once we're complete, deallocate the call object.
delete call;
}
}
void Shutdown() { cq_.Shutdown(); }
private:
// struct for keeping state and data information
struct AsyncClientCall
{
// Container for the data we expect from the server.
BatchPredictions reply;
// Context for the client. It could be used to convey extra information to
// the server and/or tweak certain RPC behaviors.
ClientContext context;
// Storage for the status of the RPC upon completion.
Status status;
std::unique_ptr<ClientAsyncResponseReader<BatchPredictions>> response_reader;
};
// Out of the passed in Channel comes the stub, stored here, our view of the
// server's exposed services.
std::unique_ptr<Inference::Stub> stub_;
// The producer-consumer queue we use to communicate asynchronously with the
// gRPC runtime.
CompletionQueue cq_;
};
DEFINE_int32(count, 500, "number of grpc messages to send");
DEFINE_int32(batch_size, 1, "batch_size");
DEFINE_int32(port, 50051, "server_port");
int main(int argc, char** argv)
{
FLAGS_alsologtostderr = 1; // It will dump to console
::google::ParseCommandLineFlags(&argc, &argv, true);
// Instantiate the client. It requires a channel, out of which the actual RPCs
// are created. This channel models a connection to an endpoint (in this case,
// localhost at port 50051). We indicate that the channel isn't authenticated
// (use of InsecureChannelCredentials()).
std::ostringstream ip_port;
ip_port << "localhost:" << FLAGS_port;
GreeterClient greeter(grpc::CreateChannel(ip_port.str(), grpc::InsecureChannelCredentials()));
// Spawn reader thread that loops indefinitely
std::thread thread_ = std::thread(&GreeterClient::AsyncCompleteRpc, &greeter);
auto start = std::chrono::steady_clock::now();
for(size_t i = 0; i < FLAGS_count; i++)
{
greeter.SayHello(i, FLAGS_batch_size); // The actual RPC call!
}
greeter.Shutdown();
thread_.join(); // blocks forever
auto end = std::chrono::steady_clock::now();
float elapsed = std::chrono::duration<float>(end - start).count();
std::cout << FLAGS_count << " requests in " << elapsed
<< "seconds; inf/sec: " << FLAGS_count * FLAGS_batch_size / elapsed << std::endl;
return 0;
}
================================================
FILE: examples/02_TensorRT_GRPC/src/metrics.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "metrics.h"
#include <glog/logging.h>
#include <ostream>
namespace trtlab {
void Metrics::Initialize(uint32_t port)
{
auto singleton = GetSingleton();
if(singleton->m_Exposer)
{
LOG(WARNING) << "Metrics already initialized. This call is ignored";
return;
}
std::ostringstream stream;
stream << "0.0.0.0:" << port;
singleton->m_Exposer = std::make_unique<Exposer>(stream.str());
singleton->m_Exposer->RegisterCollectable(singleton->m_Registry);
}
auto Metrics::GetRegistry() -> Registry&
{
auto singleton = Metrics::GetSingleton();
return *(singleton->m_Registry);
}
Metrics* Metrics::GetSingleton()
{
static Metrics singleton;
return &singleton;
}
Metrics::Metrics() : m_Registry(std::make_shared<Registry>()) {}
Metrics::~Metrics() {}
} // namespace trtlab
================================================
FILE: examples/02_TensorRT_GRPC/src/metrics.h
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <prometheus/exposer.h>
#include <prometheus/registry.h>
using prometheus::Exposer;
using prometheus::Registry;
namespace trtlab {
class Metrics
{
public:
static void Initialize(uint32_t port);
static auto GetRegistry() -> Registry&;
protected:
Metrics();
virtual ~Metrics();
static Metrics* GetSingleton();
private:
std::unique_ptr<Exposer> m_Exposer;
std::shared_ptr<Registry> m_Registry;
};
} // namespace trtlab
================================================
FILE: examples/02_TensorRT_GRPC/src/server.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "nvml.h"
#include <chrono>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <thread>
#include <unistd.h>
#include "tensorrt/laboratory/core/affinity.h"
#include "tensorrt/laboratory/core/memory/allocator.h"
#include "tensorrt/laboratory/cuda/device_info.h"
#include "tensorrt/laboratory/cuda/memory/cuda_pinned_host.h"
#include "tensorrt/laboratory/inference_manager.h"
#include "tensorrt/laboratory/runtime.h"
#include "nvrpc/context.h"
#include "nvrpc/executor.h"
#include "nvrpc/server.h"
#include "nvrpc/service.h"
#include "metrics.h"
using nvrpc::AsyncRPC;
using nvrpc::AsyncService;
using nvrpc::Context;
using nvrpc::Executor;
using nvrpc::Server;
using trtlab::Affinity;
using trtlab::Allocator;
using trtlab::CudaPinnedHostMemory;
using trtlab::DeviceInfo;
using trtlab::Metrics;
using trtlab::ThreadPool;
using trtlab::TensorRT::InferenceManager;
using trtlab::TensorRT::ManagedRuntime;
using trtlab::TensorRT::Model;
using trtlab::TensorRT::Runtime;
using trtlab::TensorRT::StandardRuntime;
// Flowers Protos
#include "inference.grpc.pb.h"
#include "inference.pb.h"
using ssd::BatchInput;
using ssd::BatchPredictions;
using ssd::Inference;
/*
* Prometheus Metrics
*
* It is important to make collect measurements to find bottlenecks, performance issues,
* and to trigger auto-scaling.
*/
static auto& registry = Metrics::GetRegistry();
// Summaries - Request and Compute duration on a per service basis
static auto& inf_compute =
prometheus::BuildSummary().Name("yais_inference_compute_duration_ms").Register(registry);
static auto& inf_request =
prometheus::BuildSummary().Name("yais_inference_request_duration_ms").Register(registry);
static const auto& quantiles =
prometheus::Summary::Quantiles{{0.5, 0.05}, {0.90, 0.01}, {0.99, 0.001}};
// Histogram - Load Ratio = Request/Compute duration - should just above one for a service
// that can keep up with its current load. This metrics provides more
// detailed information on the impact of the queue depth because it accounts
// for request time.
static const std::vector<double> buckets = {1.25, 1.50, 2.0, 10.0, 100.0}; // unitless
static auto& inf_load_ratio_fam =
prometheus::BuildHistogram().Name("yais_inference_load_ratio").Register(registry);
static auto& inf_load_ratio = inf_load_ratio_fam.Add({}, buckets);
// Gauge - Periodically measure and report GPU power utilization. As the load increases
// on the service, the power should increase proprotionally, until the power is capped
// either by device limits or compute resources. At this level, the inf_load_ratio
// will begin to increase under futher increases in traffic
static auto& power_gauge_fam =
prometheus::BuildGauge().Name("yais_gpus_power_usage").Register(registry);
static auto& power_gauge = power_gauge_fam.Add({{"gpu", "0"}});
/*
* External Data Source
*
* Attaches to a System V shared memory segment owned by an external resources.
* Example: the results of an image decode service could use this mechanism to transfer
* large tensors to an inference service by simply passing an offset.
*/
float* GetSharedMemory(const std::string& address);
/*
* YAIS Resources - TensorRT InferenceManager + ThreadPools + External Datasource
*/
class FlowersResources : public InferenceManager
{
public:
explicit FlowersResources(int max_executions, int max_buffers, int nCuda, int nResp,
float* sysv_data)
: InferenceManager(max_executions, max_buffers), m_CudaThreadPool(nCuda),
m_ResponseThreadPool(nResp), m_SharedMemory(sysv_data)
{
}
ThreadPool& GetCudaThreadPool() { return m_CudaThreadPool; }
ThreadPool& GetResponseThreadPool() { return m_ResponseThreadPool; }
float* GetSysvOffset(size_t offset_in_bytes)
{
return &m_SharedMemory[offset_in_bytes / sizeof(float)];
}
private:
ThreadPool m_CudaThreadPool;
ThreadPool m_ResponseThreadPool;
float* m_SharedMemory;
};
/*
* nvRPC Context - Defines the logic of the RPC.
*/
class FlowersContext final : public Context<BatchInput, BatchPredictions, FlowersResources>
{
void ExecuteRPC(RequestType& input, ResponseType& output) final override
{
// Executing on a Executor threads - we don't want to block message handling, so we offload
GetResources()->GetCudaThreadPool().enqueue([this, &input, &output]() {
// Executed on a thread from CudaThreadPool
auto model = GetResources()->GetModel("flowers");
auto buffers = GetResources()->GetBuffers(); // <=== Limited Resource; May Block !!!
auto bindings = buffers->CreateBindings(model);
bindings->SetBatchSize(input.batch_size());
bindings->SetHostAddress(0, GetResources()->GetSysvOffset(input.sysv_offset()));
bindings->CopyToDevice(bindings->InputBindings());
auto ctx =
GetResources()->GetExecutionContext(model); // <=== Limited Resource; May Block !!!
ctx->Infer(bindings);
bindings->CopyFromDevice(bindings->OutputBindings());
// All Async CUDA work has been queued - this thread's work is done.
GetResources()->GetResponseThreadPool().enqueue([this, &input, &output, model, bindings,
ctx]() mutable {
// Executed on a thread from ResponseThreadPool
auto compute_time = ctx->Synchronize();
ctx.reset(); // Finished with the Execution Context - Release it to competing
// threads
bindings->Synchronize(); // Blocks on H2D, Compute, D2H Pipeline
WriteBatchPredictions(input, output, (float*)bindings->HostAddress(1));
bindings.reset(); // Finished with Buffers - Release it to competing threads
auto request_time = Walltime();
output.set_compute_time(static_cast<float>(compute_time));
output.set_total_time(static_cast<float>(request_time));
this->FinishResponse();
// The Response is now sending; Record some metrics and be done
inf_compute.Add({{"model", model->Name()}}, quantiles).Observe(compute_time * 1000);
inf_request.Add({{"model", model->Name()}}, quantiles).Observe(request_time * 1000);
inf_load_ratio.Observe(request_time / compute_time);
});
});
}
void WriteBatchPredictions(RequestType& input, ResponseType& output, float* scores)
{
int N = input.batch_size();
auto nClasses = GetResources()->GetModel("flowers")->GetBinding(1).elementsPerBatchItem;
size_t cntr = 0;
for(int p = 0; p < N; p++)
{
auto element = output.add_elements();
/* Customize the post-processing of the output tensor *\
float max_val = -1.0;
int max_idx = -1;
for (int i = 0; i < nClasses; i++)
{
if (max_val < scores[cntr])
{
max_val = scores[cntr];
max_idx = i;
}
cntr++;
}
auto top1 = element->add_predictions();
top1->set_class_id(max_idx);
top1->set_score(max_val);
\* Customize the post-processing of the output tensor */
}
output.set_batch_id(input.batch_id());
}
};
static bool ValidateEngine(const char* flagname, const std::string& value)
{
struct stat buffer;
return (stat(value.c_str(), &buffer) == 0);
}
static bool ValidateBytes(const char* flagname, const std::string& value)
{
trtlab::StringToBytes(value);
return true;
}
DEFINE_string(engine, "/path/to/tensorrt.engine", "TensorRT serialized engine");
DEFINE_validator(engine, &ValidateEngine);
DEFINE_string(dataset, "127.0.0.1:4444", "GRPC Dataset/SharedMemory Service Address");
DEFINE_int32(contexts, 1, "Number of Execution Contexts");
DEFINE_int32(buffers, 0, "Number of Input/Output Buffers");
DEFINE_string(runtime, "default", "TensorRT Runtime");
DEFINE_int32(execution_threads, 1, "Number of RPC execution threads");
DEFINE_int32(preprocessing_threads, 0, "Number of preprocessing threads");
DEFINE_int32(kernel_launching_threads, 1, "Number of threads to launch CUDA kernels");
DEFINE_int32(postprocessing_threads, 2, "Number of postprocessing threads");
DEFINE_string(max_recv_bytes, "10MiB", "Maximum number of bytes for incoming messages");
DEFINE_validator(max_recv_bytes, &ValidateBytes);
DEFINE_int32(port, 50051, "Port to listen for gRPC requests");
DEFINE_int32(metrics, 50078, "Port to expose metrics for scraping");
int main(int argc, char* argv[])
{
FLAGS_alsologtostderr = 1; // Log to console
::google::InitGoogleLogging("flowers");
::google::ParseCommandLineFlags(&argc, &argv, true);
// Set CPU Affinity to be near the GPU
auto cpus = DeviceInfo::Affinity(0);
Affinity::SetAffinity(cpus);
// Enable metrics on port
Metrics::Initialize(FLAGS_metrics);
// Create a gRPC server bound to IP:PORT
std::ostringstream ip_port;
ip_port << "0.0.0.0:" << FLAGS_port;
Server server(ip_port.str());
// Modify MaxReceiveMessageSize
auto bytes = trtlab::StringToBytes(FLAGS_max_recv_bytes);
server.Builder().SetMaxReceiveMessageSize(bytes);
LOG(INFO) << "gRPC MaxReceiveMessageSize = " << trtlab::BytesToString(bytes);
// A server can host multiple services
LOG(INFO) << "Register Service (flowers::Inference) with Server";
auto inferenceService = server.RegisterAsyncService<Inference>();
// An RPC has two components that need to be specified when registering with the service:
// 1) Type of Execution Context (FlowersContext). The execution context defines the behavor
// of the RPC, i.e. it contains the control logic for the execution of the RPC.
// 2) The Request function (RequestCompute) which was generated by gRPC when compiling the
// protobuf which defined the service. This function is responsible for queuing the
// RPC's execution context to the
LOG(INFO) << "Register RPC (flowers::Inference::Compute) with Service (flowers::Inference)";
auto rpcCompute =
inferenceService->RegisterRPC<FlowersContext>(&Inference::AsyncService::RequestCompute);
// Buffers default to execution contexts + 2
// Allows for 1 H2D, N TensorRT Executions, 1 D2H to be inflight
auto buffers = FLAGS_buffers;
if(buffers == 0) buffers = FLAGS_contexts + 2;
// Initialize Resources
LOG(INFO) << "Initializing Resources for RPC (flowers::Inference::Compute)";
auto rpcResources = std::make_shared<FlowersResources>(
FLAGS_contexts, // number of IExecutionContexts - scratch space for DNN activations
buffers, // number of host/device buffers for input/output tensors
FLAGS_kernel_launching_threads, // number of threads used to execute cuda kernel launches
FLAGS_postprocessing_threads, // number of threads used to write and complete responses
GetSharedMemory(FLAGS_dataset) // pointer to data in shared memory
);
std::shared_ptr<Runtime> runtime;
if(FLAGS_runtime == "default")
{
runtime = std::make_shared<StandardRuntime>();
}
else if(FLAGS_runtime == "unified")
{
runtime = std::make_shared<ManagedRuntime>();
}
else
{
LOG(FATAL) << "Invalid TensorRT Runtime";
}
rpcResources->RegisterModel("flowers", runtime->DeserializeEngine(FLAGS_engine));
rpcResources->AllocateResources();
// Create Executors - Executors provide the messaging processing resources for the RPCs
LOG(INFO) << "Initializing Executor";
auto executor = server.RegisterExecutor(new Executor(1));
// You can register RPC execution contexts from any registered RPC on any executor.
LOG(INFO)
<< "Registering Execution Contexts for RPC (flowers::Inference::Compute) with Executor";
executor->RegisterContexts(rpcCompute, rpcResources, 100);
LOG(INFO) << "Running Server";
server.Run(std::chrono::milliseconds(2000), [] {
// Query GPU Power
nvmlDevice_t gpu;
unsigned int power;
CHECK_EQ(nvmlDeviceGetHandleByIndex(0, &gpu), NVML_SUCCESS)
<< "Failed to get Device for index=" << 0;
CHECK_EQ(nvmlDeviceGetPowerUsage(gpu, &power), NVML_SUCCESS)
<< "Failed to get Power Usage for GPU=" << 0;
power_gauge.Set((double)power * 0.001);
});
}
static auto pinned_memory = std::make_unique<Allocator<CudaPinnedHostMemory>>(1024 * 1024 * 1024);
float* GetSharedMemory(const std::string& address)
{
/* data in shared memory should go here - for the sake of quick examples just use and emptry
* array */
pinned_memory->Fill((char)0);
return (float*)pinned_memory->Data();
// the following code connects to a shared memory service to allow for non-serialized transfers
// between microservices
/*
InfoRequest request;
Info reply;
grpc::ClientContext context;
auto channel = grpc::CreateChannel(address, grpc::InsecureChannelCredentials());
auto stub = SharedMemoryDataSet::NewStub(channel);
auto status = stub->GetInfo(&context, request, &reply);
CHECK(status.ok()) << "Dataset shared memory request failed";
DLOG(INFO) << "SysV ShmKey: " << reply.sysv_key();
int shmid = shmget(reply.sysv_key(), 0, 0);
DLOG(INFO) << "SysV ShmID: " << shmid;
float* data = (float*) shmat(shmid, 0, 0);
CHECK(data) << "SysV Attached failed";
return data;
*/
}
================================================
FILE: examples/02_TensorRT_GRPC/src/siege.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Original Copyright proivded below.
* This work extends the original gRPC client examples to work with the
* implemented server.
*
* Copyright 2015 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include <chrono>
#include <iostream>
#include <memory>
#include <string>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <grpc/support/log.h>
#include <grpcpp/grpcpp.h>
#include <thread>
#include "inference.grpc.pb.h"
#include "tensorrt/laboratory/core/utils.h"
using grpc::Channel;
using grpc::ClientAsyncResponseReader;
using grpc::ClientContext;
using grpc::CompletionQueue;
using grpc::Status;
using ssd::BatchInput;
using ssd::BatchPredictions;
using ssd::Inference;
static int g_BatchSize = 1;
class GreeterClient
{
public:
explicit GreeterClient(std::shared_ptr<Channel> channel, int max_outstanding)
: stub_(Inference::NewStub(channel)), m_OutstandingMessageCount(0),
m_MaxOutstandingMessageCount(max_outstanding)
{
}
// Assembles the client's payload and sends it to the server.
void SayHello(const size_t batch_id, const int batch_size, char* bytes, uint64_t total)
{
// Data we are sending to the server.
{
std::unique_lock<std::mutex> lock(m_Mutex);
m_OutstandingMessageCount++;
while(m_OutstandingMessageCount >= m_MaxOutstandingMessageCount)
{
LOG_FIRST_N(WARNING, 10) << "Initiated Backoff - (Siege Rate > Server Compute "
"Rate) - Server Queues are full.";
m_Condition.wait(lock);
}
}
auto start = std::chrono::high_resolution_clock::now();
BatchInput request;
request.set_batch_id(batch_id);
request.set_batch_size(batch_size);
if(total)
{
request.set_data(bytes, total);
}
// Call object to store rpc data
AsyncClientCall* call = new AsyncClientCall;
// stub_->PrepareAsyncSayHello() creates an RPC object, returning
// an instance to store in "call" but does not actually start the RPC
// Because we are using the asynchronous API, we need to hold on to
// the "call" instance in order to get updates on the ongoing RPC.
call->response_reader = stub_->PrepareAsyncCompute(&call->context, request, &cq_);
// StartCall initiates the RPC call
call->response_reader->StartCall();
// Request that, upon completion of the RPC, "reply" be updated with the
// server's response; "status" with the indication of whether the operation
// was successful. Tag the request with the memory address of the call object.
call->response_reader->Finish(&call->reply, &call->status, (void*)call);
float elapsed =
std::chrono::duration<float>(std::chrono::high_resolution_clock::now() - start).count();
m_RequestCalls++;
m_TotalRequestTime += elapsed;
// LOG_EVERY_N(INFO, 200) << "Request overhead: " << m_TotalRequestTime/m_RequestCalls;
}
// Loop while listening for completed responses.
// Prints out the response from the server.
void AsyncCompleteRpc()
{
void* got_tag;
bool ok = false;
size_t cntr = 0;
auto start = std::chrono::steady_clock::now();
float last = 0.0;
// Block until the next result is available in the completion queue "cq".
while(cq_.Next(&got_tag, &ok))
{
// The tag in this example is the memory location of the call object
AsyncClientCall* call = static_cast<AsyncClientCall*>(got_tag);
// Verify that the request was completed successfully. Note that "ok"
// corresponds solely to the request for updates introduced by Finish().
GPR_ASSERT(ok);
if(call->status.ok())
{
// std::cout << "Greeter received: " << call->reply.batch_id() << std::endl;
}
else
{
std::cout << "RPC failed" << std::endl;
}
// Once we're complete, deallocate the call object.
delete call;
cntr++;
float elapsed =
std::chrono::duration<float>(std::chrono::steady_clock::now() - start).count();
if(elapsed - last > 0.5)
{
LOG(INFO) << "avg. rate: " << (float)cntr / (elapsed - last) << "( "
<< (float)(cntr * g_BatchSize) / (elapsed - last) << " inf/sec)";
last = elapsed;
cntr = 0;
}
{
std::unique_lock<std::mutex> lock(m_Mutex);
m_OutstandingMessageCount--;
}
m_Condition.notify_one();
}
}
void Shutdown() { cq_.Shutdown(); }
private:
// struct for keeping state and data information
struct AsyncClientCall
{
// Container for the data we expect from the server.
BatchPredictions reply;
// Context for the client. It could be used to convey extra information to
// the server and/or tweak certain RPC behaviors.
ClientContext context;
// Storage for the status of the RPC upon completion.
Status status;
std::unique_ptr<ClientAsyncResponseReader<BatchPredictions>> response_reader;
};
// Out of the passed in Channel comes the stub, stored here, our view of the
// server's exposed services.
std::unique_ptr<Inference::Stub> stub_;
// The producer-consumer queue we use to communicate asynchronously with the
// gRPC runtime.
CompletionQueue cq_;
// mutex to help control rate
std::mutex m_Mutex;
std::condition_variable m_Condition;
int m_OutstandingMessageCount;
int m_MaxOutstandingMessageCount;
float m_TotalRequestTime;
size_t m_RequestCalls;
};
static bool ValidateBytes(const char* flagname, const std::string& value)
{
trtlab::StringToBytes(value);
return true;
}
DEFINE_int32(count, 1000000, "number of grpc messages to send");
DEFINE_int32(batch_size, 1, "batch_size");
DEFINE_int32(max_outstanding, 950, "maximum outstanding requests");
DEFINE_int32(port, 50051, "server_port");
DEFINE_double(rate, 1.0, "messages per second");
DEFINE_double(max_rate, 100000, "maximum number of messages per second when func is applied");
DEFINE_double(alpha, 0, "alpha");
DEFINE_double(beta, 1, "beta");
DEFINE_string(func, "constant", "constant, linear or cyclic");
DEFINE_string(bytes, "0B", "add extra bytes to the request payload");
DEFINE_validator(bytes, &ValidateBytes);
int main(int argc, char** argv)
{
FLAGS_alsologtostderr = 1; // It will dump to console
::google::ParseCommandLineFlags(&argc, &argv, true);
g_BatchSize = FLAGS_batch_size;
auto bytes = trtlab::StringToBytes(FLAGS_bytes);
char extra_bytes[bytes];
if(bytes)
LOG(INFO) << "Sending an addition " << trtlab::BytesToString(bytes)
<< " bytes in request payload";
// using a fixed rate of 15us per rpc call. i could adjust dynamically as i'm tracking
// the call overhead, but it's close enough.
auto start = std::chrono::system_clock::now();
auto walltime = [start]() -> double {
return std::chrono::duration<double>(std::chrono::system_clock::now() - start).count();
};
std::map<std::string, std::function<double()>> rates_by_name;
rates_by_name["constant"] = []() -> double { return std::min(FLAGS_rate, FLAGS_max_rate); };
rates_by_name["linear"] = [start, walltime]() -> double {
return std::min(FLAGS_rate + (FLAGS_alpha / 60.0) * walltime(), FLAGS_max_rate);
};
rates_by_name["cyclic"] = [start, walltime]() -> double {
return std::min(FLAGS_rate + FLAGS_alpha *
std::sin(2.0 * 3.14159 * (FLAGS_beta / 60.0) * walltime()),
FLAGS_max_rate);
};
auto search = rates_by_name.find(FLAGS_func);
if(search == rates_by_name.end())
{
LOG(FATAL) << "--func must be constant, linear or cyclic; your value = " << FLAGS_func;
}
auto sleepy = [search]() -> double {
auto sleep_time = ((std::chrono::seconds(1) / std::max((search->second)(), 2.0))) -
std::chrono::microseconds(15);
return std::chrono::duration<double>(sleep_time).count();
};
// Instantiate the client. It requires a channel, out of which the actual RPCs
// are created. This channel models a connection to an endpoint (in this case,
// localhost at port 50051). We indicate that the channel isn't authenticated
// (use of InsecureChannelCredentials()).
std::ostringstream ip_port;
ip_port << "localhost:" << FLAGS_port;
grpc::ChannelArguments ch_args;
ch_args.SetMaxReceiveMessageSize(-1);
GreeterClient greeter(
grpc::CreateCustomChannel(ip_port.str(), grpc::InsecureChannelCredentials(), ch_args),
FLAGS_max_outstanding);
// Spawn reader thread that loops indefinitely
std::thread thread_ = std::thread(&GreeterClient::AsyncCompleteRpc, &greeter);
for(size_t i = 0; i < FLAGS_count; i++)
{
greeter.SayHello(i, FLAGS_batch_size, extra_bytes, bytes); // The actual RPC call!
auto start = std::chrono::high_resolution_clock::now();
while(std::chrono::duration<float>(std::chrono::high_resolution_clock::now() - start)
.count() < sleepy())
{
std::this_thread::yield();
}
}
greeter.Shutdown();
thread_.join(); // blocks forever
auto elapsed = walltime();
std::cout << FLAGS_count << " requests in " << elapsed
<< "seconds; inf/sec: " << FLAGS_count * FLAGS_batch_size / elapsed << std::endl;
return 0;
}
================================================
FILE: examples/02_TensorRT_GRPC/src/sync-client.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Original Copyright proivded below.
* This work extends the original gRPC client examples to work with the
* implemented server.
*
* Copyright 2015 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#include <chrono>
#include <iostream>
#include <memory>
#include <string>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <grpcpp/grpcpp.h>
#include "inference.grpc.pb.h"
using grpc::Channel;
using grpc::ClientContext;
using grpc::Status;
using ssd::BatchInput;
using ssd::BatchPredictions;
using ssd::Inference;
class SimpleClient
{
public:
SimpleClient(std::shared_ptr<Channel> channel) : stub_(Inference::NewStub(channel)) {}
// Assembles the client's payload, sends it and presents the response back
// from the server.
int Compute(const int batch_id, const int batch_size)
{
// Data we are sending to the server.
BatchInput request;
request.set_batch_id(batch_id);
request.set_batch_size(batch_size);
// Container for the data we expect from the server.
BatchPredictions reply;
// Context for the client. It could be used to convey extra information to
// the server and/or tweak certain RPC behaviors.
ClientContext context;
// The actual RPC.
Status status = stub_->Compute(&context, request, &reply);
// Act upon its status.
if(status.ok())
{
return reply.batch_id();
}
else
{
std::cout << status.error_code() << ": " << status.error_message() << std::endl;
return -1;
}
}
private:
std::unique_ptr<Inference::Stub> stub_;
};
DEFINE_int32(count, 1000, "number of grpc messages to send");
DEFINE_int32(port, 50051, "server_port");
DEFINE_int32(batch, 1, "batch size");
int main(int argc, char** argv)
{
// Instantiate the client. It requires a channel, out of which the actual RPCs
// are created. This channel models a connection to an endpoint (in this case,
// localhost at port 50051). We indicate that the channel isn't authenticated
// (use of InsecureChannelCredentials()).
FLAGS_alsologtostderr = 1; // It will dump to console
::google::ParseCommandLineFlags(&argc, &argv, true);
std::ostringstream ip_port;
ip_port << "localhost:" << FLAGS_port;
SimpleClient client(grpc::CreateChannel(ip_port.str(), grpc::InsecureChannelCredentials()));
auto start = std::chrono::steady_clock::now();
for(int i = 0; i < FLAGS_count; i++)
{
auto reply = client.Compute(i, FLAGS_batch);
if(reply == -1 || reply != i) std::cout << "BatchId received: " << reply << std::endl;
}
auto end = std::chrono::steady_clock::now();
float elapsed = std::chrono::duration<float>(end - start).count();
std::cout << FLAGS_count << " requests in " << elapsed
<< " seconds; inf/sec: " << FLAGS_count * FLAGS_batch / elapsed << std::endl;
return 0;
}
================================================
FILE: examples/03_Batching/CMakeLists.txt
================================================
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add_executable(streaming-service-echo.x
streaming-service.cc
)
target_link_libraries(streaming-service-echo.x
trtlab::nvrpc
echo-protos
gflags
)
add_executable(batching-service-echo.x
inference-batcher.cc
)
target_link_libraries(batching-service-echo.x
trtlab::nvrpc
echo-protos
gflags
)
================================================
FILE: examples/03_Batching/README.md
================================================
# Batching Service
A batching service is a service that trying to collect sets of similar requests into a
collective batch which can be executed in a single-shot.
#### Why do we want to batch?
In the case of Deep Neural Networks, batching can improve the computational efficiency
of executing on a GPU by increases the operational intensity, i.e. improving the ratio of
the number of math operations per memory transaction. This translates to improved
throughput, better hardware utilization and cost reductions.
#### Sounds great, but what's the catch?
In many cases, batching can add latency to an individual request. Because a batch of more
than 1 item, BatchN, computed as a single unit, the time to compute BatchN is greater than
Batch1. However, in many cases, the time to compute delta between Batch1 and Batch2/4/8 is
fairly small due to the improved operational efficiency.
Secondly, because batching requires requests to be collected, there is a timed collection
window prior to the compute. The first request in a batch sees the longest latency.
The worst-case increased latency is bounded by the following formula:
```
worst_additional_latency = batch_window_timeout + batchN_compute - batch1_compute
```
#### When to Batch?
You want to batch requests when your service has very high-load and you can tolerate
minor increases in latency.
Throughput improvements can be 2-5x which translates into direct cost savings.
#### What does this Batching Service do for me?
The basis YAIS service examples [01_GRPC](../01_GRPC) and [02_GRPC_TensorRT](../02_GRPC_TensorRT)
have implemented high-performance send/recv unary services. That is, the client
sends a request which is computed and a response is returned. The client could in theory
create a single message is itself a batch, i.e. multiple images files or sentences to be
translated. However, in most common realworld usecases, the clients of a service send
a single item at a time. This keeps the logic simple and lifecycle of the request simple.
If this is your RPC definition,
```
service Inference {
rpc Compute (Input) returns (Output) {}
}
```
Then, instead of implementing `rpc Compute` to perform the inference computation, instead, we
hyjack that RPC and turn it into a batcher. In the [`inference-batcher.cc`](inference-batcher.cc)
file, you will see that we are indeed we implement our batching service as the `Compute` method.
The batching service collecting incoming `Input` requests and forwards them via a gRPC stream
to a service that accepts a "batching stream".
A “batching stream” is a stream where the endpoint service reads and collects the elements of the stream until the client signifies it is done writing. That is the signal at which YAIS performs a single batched inference call on the concatenated set of requests that came in over the stream. After the inference calculation is complete, the server writes the results for each request item to the stream. That is, for each request that came in on the stream, the server is expected to return a response.
We still need to compute inference on the batching stream. This is performed by [streaming-service.cc](streaming-service.cc).
The `streaming-service` implements the `BatchedCompute` RPC method using a `BatchingContext`.
```
service Inference {
rpc Compute (Input) returns (Output) {}
rpc BatchedCompute (stream Input) returns (stream Output) {}
}
```
Because the stream consists of an array of individual messages, you simply need to make
minor modifications to your existing Batch1 service to preprocess and concat the incoming requests
together to form a single batch compute. For each `Input` item in the stream, it is expected that
the service writes an `Output` response in the same order as the inputs (FIFO).
The batching service doesn’t need to know anything about the format of the `Input`/`Output` messages. It simply accepts and forwards them. The result is that this batching service example should be able to work with any unary gRPC service with any request/response message. You simply need to implement a streaming service capable of handling the forwarding stream.
## Running Example
```
./launch_batching.sh
```
```
... # streaming service startup
... # batching service startup
Starting a shell keeping the services and load-balancer running...
Try python unary_client.py - exit shell to kill services
Batching Subshell: python unary_client.py
I0822 14:48:18.900671 50 inference-batcher.cc:344] incoming unary request
I0822 14:48:18.902642 41 inference-batcher.cc:109] Client using CQ: 0x14470f0
I0822 14:48:18.902680 41 inference-batcher.cc:140] Starting Batch Forwarding of Size 1 for Tag 0x1458450
I0822 14:48:18.903472 35 streaming-service.cc:61] Recieved request with batch_id=78
I0822 14:48:18.903504 35 streaming-service.cc:54] Response with batch_id=78
I0822 14:48:18.903656 47 inference-batcher.cc:243] Batch Forwarding Completed for Tag 0x1458450
Received msg with batch_id=78
```
================================================
FILE: examples/03_Batching/inference-batcher.cc
================================================
/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <chrono>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <thread>
#include "nvrpc/context.h"
#include "nvrpc/executor.h"
#include "nvrpc/server.h"
#include "tensorrt/laboratory/core/thread_pool.h"
using nvrpc::Context;
using nvrpc::Executor;
using nvrpc::Server;
using trtlab::ThreadPool;
#include "moodycamel/blockingconcurrentqueue.h"
using moodycamel::BlockingConcurrentQueue;
using moodycamel::ConsumerToken;
using moodycamel::ProducerToken;
#include "echo.grpc.pb.h"
#include "echo.pb.h"
/**
* @brief Batching Service for Unary Requests
*
* Exposes a Unary (send/recv) interface for a given RPC, but rather than
* computing the RPC, the service simply batches the incoming requests and
* forwards them via a gRPC stream to a service that implements the actual
* compute portion of the RPC.
*
* The backend compute service is not a Unary service. Rather it must
* implemented the LifeCycleBatching service Context, i.e. BatchingContext.
* The other application in this folder implements the backend service.
*
* Streams are used as a forwarding mechanism because of how they interact
* with a load-balancer. Unlike unary requests which get balanced on each
* request, a stream only get balanced when it is opened. All items of a stream
* go to the same endpoint service.
*
* @tparam ServiceType
* @tparam Request
* @tparam Response
*/
template<class ServiceType, class Request, class Response>
struct BatchingService
{
using Callback = std::function<void(bool)>;
struct MessageType
{
Request* request;
Response* response;
Callback callback;
};
/**
* @brief Forwards incoming Unary requests via a gRPC Stream to
* a Batched Steaming Service that implements the actual RPC
*/
class Client
{
public:
using PrepareFunc =
std::function<std::unique_ptr<::grpc::ClientAsyncReaderWriter<Request, Response>>(
::grpc::ClientContext*, ::grpc::CompletionQueue*)>;
Client(PrepareFunc prepare_func, std::shared_ptr<ThreadPool> thread_pool)
: m_PrepareFunc(prepare_func), m_ThreadPool(thread_pool), m_CurrentCQ(0)
{
for(decltype(m_ThreadPool->Size()) i = 0; i < m_ThreadPool->Size(); i++)
{
LOG(INFO) << "Starting Client Progress Engine #" << i;
m_CQs.emplace_back(new ::grpc::CompletionQueue);
auto cq = m_CQs.back().get();
m_ThreadPool->enqueue([this, cq] { ProgressEngine(*cq); });
}
}
void WriteAndCloseStream(uint32_t messages_count, MessageType* messages)
{
auto cq = m_CQs[++m_CurrentCQ % m_CQs.size()].get();
LOG(INFO) << "Client using CQ: " << (void*)cq;
auto ctx = new Call;
for(uint32_t i = 0; i < messages_count; i++)
{
ctx->Push(messages[i]);
}
ctx->m_Stream = m_PrepareFunc(&ctx->m_Context, cq);
ctx->Start();
}
private:
class Call
{
public:
Call() : m_Started(false), m_NextState(&Call::StateInvalid) {}
virtual ~Call() {}
void Push(MessageType& message)
{
if(m_Started) LOG(FATAL) << "Stream started; No pushing allowed.";
m_Requests.push(message.request);
m_Responses.push(message.response);
m_CallbackByResponse[message.response] = message.callback;
}
void Start()
{
LOG(INFO) << "Starting Batch Forwarding of Size " << m_Requests.size()
<< " for Tag " << Tag();
m_NextState = &Call::StateWriteDone;
m_Stream->StartCall(Tag());
}
private:
bool RunNextState(bool ok)
{
bool ret = (this->*m_NextState)(ok);
if(!ret) DLOG(INFO) << "RunNextState returning false";
return ret;
}
void* Tag() { return static_cast<void*>(this); }
bool Fail()
{
LOG(FATAL) << "Fail";
return false;
}
void WriteNext()
{
if(m_Requests.size())
{
auto request = m_Requests.front();
m_Requests.pop();
DLOG(INFO) << "forwarding request";
m_NextState = &Call::StateWriteDone;
m_Stream->Write(*request, Tag());
}
else
{
DLOG(INFO) << "closing client stream for writing";
m_NextState = &Call::StateCloseStreamDone;
m_Stream->WritesDone(Tag());
}
}
void ReadNext()
{
if(m_Responses.size())
{
DLOG(INFO) << "waiting on response";
auto response = m_Responses.front();
m_NextState = &Call::StateReadDone;
m_Stream->Read(response, Tag());
}
else
{
DLOG(INFO) << "waiting on finished message from server";
m_NextState = &Call::StateFinishedDone;
m_Stream->Finish(&m_Status, Tag());
}
}
bool StateWriteDone(bool ok)
{
if(!ok) return Fail();
DLOG(INFO) << "request forwarded!";
WriteNext();
return true;
}
bool StateReadDone(bool ok)
{
if(!ok) return Fail();
DLOG(INFO) << "response received";
auto response = m_Responses.front();
m_Responses.pop();
auto search = m_CallbackByResponse.find(response);
if(search == m_CallbackByResponse.end())
LOG(FATAL) << "Callback for response not found";
ReadNext();
// Execute callback which will complete the unary request for this stream item
DLOG(INFO) << "triggering callback on held receive context";
search->second(true);
DLOG(INFO) << "callback completed";
return true;
}
bool StateCloseStreamDone(bool ok)
{
if(!ok) return Fail();
DLOG(INFO) << "closed client stream for writing";
ReadNext();
return true;
}
bool StateFinishedDone(bool ok)
{
if(m_Status.ok())
DLOG(INFO) << "ClientContext: " << Tag() << " finished with OK";
else
DLOG(INFO) << "ClientContext: " << Tag() << " finished with CANCELLED";
m_NextState = &Call::StateInvalid;
LOG(INFO) << "Batch Forwarding Completed for Tag " << Tag();
return false;
}
bool StateInvalid(bool ok) { LOG(FATAL) << "This should never be called"; }
private:
std::queue<Request*> m_Requests;
std::queue<Response*> m_Responses;
std::map<const Response*, Callback> m_CallbackByResponse;
bool (Call::*m_NextState)(bool);
::grpc::Status m_Status;
::grpc::ClientContext m_Context;
std::unique_ptr<::grpc::ClientAsyncReaderWriter<Request, Response>> m_Stream;
bool m_Started;
friend class Client;
};
void ProgressEngine(::grpc::CompletionQueue& cq)
{
void* tag;
bool ok = false;
while(cq.Next(&tag, &ok))
{
CHECK(ok) << "not ok";
Call* call = static_cast<Call*>(tag);
if(!call->RunNextState(ok))
{
DLOG(INFO) << "Deleting Stream: " << tag;
delete call;
}
}
}
int m_CurrentCQ;
PrepareFunc m_PrepareFunc;
std::shared_ptr<ThreadPool> m_ThreadPool;
std::vector<std::unique_ptr<::grpc::CompletionQueue>> m_CQs;
};
class Resources : public ::trtlab::Resources
{
public:
Resources(uint32_t max_batch_size, uint64_t timeout, std::shared_ptr<Client> client)
: m_MaxBatchsize(max_batch_size), m_Timeout(timeout), m_Client(client)
{
}
virtual void PreprocessRequest(Request* req) {}
void Push(Request* req, Response* resp, Callback callback)
{
// thread_local ProducerToken token(m_MessageQueue);
// m_MessageQueue.enqueue(token, MessageType(req, resp, callback));
PreprocessRequest(req);
m_MessageQueue.enqueue(MessageType{req, resp, callback});
}
void ProgressEngine()
{
constexpr uint64_t quanta = 100;
const double timeout = static_cast<double>(m_Timeout - quanta) / 1000000.0;
size_t total_count;
size_t max_batch;
std::vector<MessageType> messages;
messages.resize(m_MaxBatch)
thread_local ConsumerToken token(m_MessageQueue);
for(;;)
{
max_batch = m_MaxBatchsize;
total_count = 0;
auto start = std::chrono::steady_clock::now();
auto elapsed = [start]() -> double {
return std::chrono::duration<double>(std::chrono::steady_clock::now() - start)
.count();
};
// initial pull - if not successful, restart loop
// if successful, then open a stream, push message to stream and continue to collect
// requests until the max_batch_size is reach for the timeout is triggered
// finish sending
// r
do
{
auto count = m_MessageQueue.wait_dequeue_bulk_timed(
token, &messages[total_count], max_batch, quanta);
total_count += count;
max_batch -= count;
} while(total_count && total_count < m_MaxBatchsize && elapsed() < timeout);
if(total_count)
{
m_Client->WriteAndCloseStream(total_count, messages);
}
}
}
private:
size_t m_MaxBatchsize;
uint64_t m_Timeout;
std::shared_ptr<Client> m_Client;
BlockingConcurrentQueue<MessageType> m_MessageQueue;
};
class ReceiveContext final : public ::nvrpc::Context<Request, Response, Resources>
{
void ExecuteRPC(Request& request, Response& response) final override
{
LOG(INFO) << "incoming unary request";
this->GetResources()->Push(&request, &response, [this](bool ok) {
if(ok)
this->FinishResponse();
else
{
LOG(INFO) << "shoot";
this->CancelResponse();
}
});
}
};
};
DEFINE_uint32(max_batch_size, 8, "Maximum batch size to collect and foward");
DEFINE_uint64(timeout_usecs, 2000, "Batching window timeout in microseconds");
DEFINE_uint32(max_batches_in_flight, 1, "Maximum number of forwarded batches");
DEFINE_uint32(receiving_threads, 1, "Number of Forwarding threads");
DEFINE_uint32(forwarding_threads, 1, "Number of Forwarding threads");
DEFINE_string(forwarding_target, "localhost:50051", "Batched Compute Service / Load-Balancer");
using InferenceBatchingService = BatchingService<simple::Inference, simple::Input, simple::Output>;
int main(int argc, char* argv[])
{
FLAGS_alsologtostderr = 1; // Log to console
::google::InitGoogleLogging("simpleBatchingService");
::google::ParseCommandLineFlags(&argc, &argv, true);
auto forwarding_threads = std::make_shared<ThreadPool>(FLAGS_forwarding_threads);
auto channel = grpc::CreateChannel(FLAGS_forwarding_target, grpc::InsecureChannelCredentials());
auto stub = ::simple::Inference::NewStub(channel);
auto forwarding_prepare_func = [&stub](::grpc::ClientContext * context,
::grpc::CompletionQueue * cq) -> auto
{
return std::move(stub->PrepareAsyncBatchedCompute(context, cq));
};
auto client = std::make_shared<InferenceBatchingService::Client>(forwarding_prepare_func,
forwarding_threads);
auto rpcResources = std::make_shared<InferenceBatchingService::Resources>(
FLAGS_max_batch_size, FLAGS_timeout_usecs, client);
Server server("0.0.0.0:50049");
auto recvService = server.RegisterAsyncService<::simple::Inference>();
auto rpcCompute = recvService->RegisterRPC<InferenceBatchingService::ReceiveContext>(
&::simple::Inference::AsyncService::RequestCompute);
uint64_t context_count = FLAGS_max_batch_size * FLAGS_max_batches_in_flight;
uint64_t contexts_per_executor_thread = std::max(context_count / FLAGS_receiving_threads, 1UL);
auto executor = server.RegisterExecutor(new Executor(FLAGS_receiving_threads));
executor->RegisterContexts(rpcCompute, rpcResources, contexts_per_executor_thread);
LOG(INFO) << "Running Server";
server.Run(std::chrono::milliseconds(1), [rpcResources] { rpcResources->ProgressEngine(); });
return 0;
}
================================================
FILE: examples/03_Batching/launch_batching.sh
================================================
#!/bin/bash -e
#
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
cleanup() {
kill $(jobs -p) ||:
}
trap "cleanup" EXIT SIGINT SIGTERM
sleep 1
echo "starting streaming services"
/work/build/examples/03_Batching/streaming-service-echo.x &
wait-for-it.sh localhost:50051 --timeout=0 -- echo "Streaming service is ready."
echo "starting batching service"
/work/build/examples/03_Batching/batching-service-echo.x &
wait-for-it.sh localhost:50049 --timeout=0 -- echo "Batching service is ready."
echo
echo "Starting a shell keeping the services and load-balancer running..."
echo "Try python unary_client.py - exit shell to kill services"
bash --rcfile <(echo "PS1='Batching Subshell: '")
================================================
FILE: examples/03_Batching/simple_batching_client.py
================================================
import grpc
import simple_pb2
import simple_pb2_grpc
def run():
with grpc.insecure_channel('localhost:50051') as channel:
stub = simple_pb2_grpc.InferenceStub(channel)
def requests():
messages = [simple_pb2.Input(batch_id=i) for i in range(10)]
for msg in messages:
print("Sending Stream batch_id={}".format(msg.batch_id))
yield msg
responses = stub.BatchedCompute(requests())
for resp in responses:
print("Received msg on stream with batch_id={}".format(resp.batch_id))
if __name__ == "__main__":
run()
================================================
FILE: examples/03_Batching/simple_pb2.py
================================================
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: simple.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
from google.protobuf import descriptor_pb2
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='simple.proto',
package='simple',
syntax='proto3',
serialized_pb=_b('\n\x0csimple.proto\x12\x06simple\"\x19\n\x05Input\x12\x10\n\x08\x62\x61tch_id\x18\x01 \x01(\x04\"\x1a\n\x06Output\x12\x10\n\x08\x62\x61tch_id\x18\x01 \x01(\x04\x32n\n\tInference\x12*\n\x07\x43ompute\x12\r.simple.Input\x1a\x0e.simple.Output\"\x00\x12\x35\n\x0e\x42\x61tchedCompute\x12\r.simple.Input\x1a\x0e.simple.Output\"\x00(\x01\x30\x01\x62\x06proto3')
)
_INPUT = _descriptor.Descriptor(
name='Input',
full_name='simple.Input',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='batch_id', full_name='simple.Input.batch_id', index=0,
number=1, type=4, cpp_type=4, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=24,
serialized_end=49,
)
_OUTPUT = _descriptor.Descriptor(
name='Output',
full_name='simple.Output',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='batch_id', full_name='simple.Output.batch_id', index=0,
number=1, type=4, cpp_type=4, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=51,
serialized_end=77,
)
DESCRIPTOR.message_types_by_name['Input'] = _INPUT
DESCRIPTOR.message_types_by_name['Output'] = _OUTPUT
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
Input = _reflection.GeneratedProtocolMessageType('Input', (_message.Message,), dict(
DESCRIPTOR = _INPUT,
__module__ = 'simple_pb2'
# @@protoc_insertion_point(class_scope:simple.Input)
))
_sym_db.RegisterMessage(Input)
Output = _reflection.GeneratedProtocolMessageType('Output', (_message.Message,), dict(
DESCRIPTOR = _OUTPUT,
__module__ = 'simple_pb2'
# @@protoc_insertion_point(class_scope:simple.Output)
))
_sym_db.RegisterMessage(Output)
_INFERENCE = _descriptor.ServiceDescriptor(
name='Inference',
full_name='simple.Inference',
file=DESCRIPTOR,
index=0,
options=None,
serialized_start=79,
serialized_end=189,
methods=[
_descriptor.MethodDescriptor(
name='Compute',
full_name='simple.Inference.Compute',
index=0,
containing_service=None,
input_type=_INPUT,
output_type=_OUTPUT,
options=None,
),
_descriptor.MethodDescriptor(
name='BatchedCompute',
full_name='simple.Inference.BatchedCompute',
index=1,
containing_service=None,
input_type=_INPUT,
output_type=_OUTPUT,
options=None,
),
])
_sym_db.RegisterServiceDescriptor(_INFERENCE)
DESCRIPTOR.services_by_name['Inference'] = _INFERENCE
# @@protoc_insertion_point(module_scope)
================================================
FILE: examples/03_Batching/simple_pb2_grpc.py
================================================
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
import grpc
import simple_pb2 as simple__pb2
class InferenceStub(object):
# missing associated docume
gitextract_8sd61058/
├── .bazelrc
├── .clang-format
├── .dockerignore
├── .gitmodules
├── BUILD.bazel
├── CLA
├── CMakeLists.txt
├── CREDITS.md
├── Dockerfile
├── LICENSE
├── README.md
├── WORKSPACE
├── bazel/
│ ├── BUILD.bazel
│ ├── cuda_configure.bzl
│ ├── repositories.bzl
│ └── tensorrt_configure.bzl
├── build.sh
├── cmake/
│ ├── FindTensorRT.cmake
│ ├── Findcpuaff.cmake
│ ├── GRPCGenerateCPP.cmake
│ ├── GRPCGenerateCPPLikeBazel.cmake
│ ├── LibFindMacros.cmake
│ ├── ProtobufGenerateCPPLikeBazel.cmake
│ └── dependencies.cmake
├── devel.sh
├── examples/
│ ├── 00_TensorRT/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── infer.cc
│ │ └── inference.cc
│ ├── 01_Basic_GRPC/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── src/
│ │ ├── async_client.cc
│ │ ├── client.cpp
│ │ └── server.cpp
│ ├── 02_TensorRT_GRPC/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── src/
│ │ ├── async-client.cc
│ │ ├── metrics.cc
│ │ ├── metrics.h
│ │ ├── server.cc
│ │ ├── siege.cc
│ │ └── sync-client.cc
│ ├── 03_Batching/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── inference-batcher.cc
│ │ ├── launch_batching.sh
│ │ ├── simple_batching_client.py
│ │ ├── simple_pb2.py
│ │ ├── simple_pb2_grpc.py
│ │ ├── streaming-service.cc
│ │ └── unary_client.py
│ ├── 04_Middleman/
│ │ ├── CMakeLists.txt
│ │ └── middleman-client.cc
│ ├── 10_Internals/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── internals.cc
│ ├── 11_Protos/
│ │ ├── CMakeLists.txt
│ │ ├── demo/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── dataset.proto
│ │ │ └── inference.proto
│ │ ├── echo/
│ │ │ ├── CMakeLists.txt
│ │ │ └── echo.proto
│ │ └── inference/
│ │ ├── CMakeLists.txt
│ │ ├── api.proto
│ │ ├── model_config.proto
│ │ ├── nvidia_inference.proto
│ │ ├── request_status.proto
│ │ └── server_status.proto
│ ├── 12_ConfigGenerator/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── generator.cc
│ │ ├── link.sh
│ │ └── ms_mgmt
│ ├── 12_FlatBuffers/
│ │ ├── CMakeLists.txt
│ │ ├── client.cc
│ │ ├── example.fbs
│ │ ├── example.grpc.fb.cc
│ │ ├── example.grpc.fb.h
│ │ ├── example_generated.h
│ │ └── server.cc
│ ├── 30_PyTensorRT/
│ │ ├── README.md
│ │ ├── client.py
│ │ ├── compute.py
│ │ ├── infer_test_utils.py
│ │ ├── rebuild.sh
│ │ └── server.py
│ ├── 90_Kubernetes/
│ │ ├── README.md
│ │ ├── bootstrap-minikube.sh
│ │ ├── deploy/
│ │ │ └── build-and-run.sh
│ │ ├── devel/
│ │ │ ├── README.md
│ │ │ └── yais-devel.yml
│ │ ├── istio/
│ │ │ ├── README.md
│ │ │ └── rendered/
│ │ │ ├── istio-v0.8-minikube.yml
│ │ │ └── istio-v1.0-minikube.yml
│ │ ├── minikube/
│ │ │ ├── README.md
│ │ │ └── bootstrap.sh
│ │ ├── prometheus/
│ │ │ ├── bootstrap.sh
│ │ │ ├── custom-settings.yml
│ │ │ ├── service-account.yml
│ │ │ ├── yais-dashboard.json
│ │ │ └── yais-metrics.yml
│ │ └── yais-deploy.yml
│ ├── 91_Prometheus/
│ │ ├── README.md
│ │ └── scrape.conf
│ ├── 97_SingleProcessMultiSteam/
│ │ └── launch_service.sh
│ ├── 98_MultiProcessSingleStream/
│ │ ├── README.md
│ │ ├── run_latency_test
│ │ ├── run_throughput_test
│ │ └── setup.py
│ ├── 99_LoadBalancer/
│ │ ├── README.md
│ │ ├── lb-envoy.j2
│ │ └── run_loadbalancer.py
│ ├── CMakeLists.txt
│ ├── Deployment/
│ │ ├── CMakeLists.txt
│ │ ├── ImageClient/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── api.proto
│ │ │ ├── client.cc
│ │ │ ├── client.h
│ │ │ └── client.py
│ │ ├── Kubernetes/
│ │ │ └── basic-trtis-deployment/
│ │ │ ├── deploy.yml
│ │ │ ├── istio-ingress.yml
│ │ │ └── scrape-metrics.yml
│ │ ├── ObjectStore/
│ │ │ ├── README.md
│ │ │ ├── create_buckets.py
│ │ │ ├── get_rook_s3_keys.sh
│ │ │ ├── ingress-istio.yml
│ │ │ ├── ingress-nginx.yml
│ │ │ └── rook-s3.yml
│ │ ├── README.md
│ │ ├── RouteRequests/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── README.md
│ │ │ ├── envoy_config.yaml
│ │ │ ├── test_client.py
│ │ │ ├── test_routing.sh
│ │ │ └── test_service.cc
│ │ └── batcher.cc
│ ├── ONNX/
│ │ └── resnet50/
│ │ ├── README.md
│ │ ├── build.py
│ │ ├── calibration_images.csv
│ │ ├── calibrator.py
│ │ ├── fetch.sh
│ │ ├── imagenet_labels.py
│ │ ├── int8.py
│ │ ├── onnx_utils.py
│ │ ├── open_source_images.md5
│ │ ├── resnet50.md5
│ │ ├── run_jpeg_test.py
│ │ └── run_onnx_tests.py
│ └── nvRPC/
│ ├── CMakeLists.txt
│ ├── SharedMemoryService/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── client.cc
│ │ └── server.cc
│ ├── StreamingInOrderSendRecv/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── client.cc
│ │ ├── server.cc
│ │ └── test.sh
│ ├── StreamingService/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── client.cc
│ │ ├── common.h
│ │ ├── even-odds.cc
│ │ ├── ping-pong.cc
│ │ └── test.sh
│ └── UnaryService/
│ ├── CMakeLists.txt
│ ├── client.cc
│ └── server.cc
├── jupyter_notebook_config.py
├── models/
│ ├── README.md
│ ├── ResNet-152-deploy.prototxt
│ ├── ResNet-50-deploy.prototxt
│ ├── mps_builder
│ ├── onnx/
│ │ ├── common.py
│ │ ├── mnist-v1.3/
│ │ │ ├── model.onnx
│ │ │ ├── test_data_set_0/
│ │ │ │ ├── input_0.pb
│ │ │ │ └── output_0.pb
│ │ │ ├── test_data_set_1/
│ │ │ │ ├── input_0.pb
│ │ │ │ └── output_0.pb
│ │ │ └── test_data_set_2/
│ │ │ ├── input_0.pb
│ │ │ └── output_0.pb
│ │ └── onnx_builder.py
│ └── setup.py
├── notebooks/
│ ├── Demo Day 1.ipynb
│ ├── Demo Day 2.ipynb
│ ├── Demo Day 3.ipynb
│ ├── Multiple Models.ipynb
│ ├── Quickstart.ipynb
│ └── README.md
├── requirements.txt
└── trtlab/
├── BUILD.bazel
├── CMakeLists.txt
├── core/
│ ├── BUILD.bazel
│ ├── CMakeLists.txt
│ ├── benchmarks/
│ │ ├── CMakeLists.txt
│ │ ├── bench_batcher.cc
│ │ ├── bench_memory.cc
│ │ ├── bench_memory_stack.cc
│ │ ├── bench_pool.cc
│ │ ├── bench_thread_pool.cc
│ │ └── main.cc
│ ├── include/
│ │ └── trtlab/
│ │ └── core/
│ │ ├── affinity.h
│ │ ├── async_compute.h
│ │ ├── batcher.h
│ │ ├── cyclic_buffer.h
│ │ ├── cyclic_windowed_buffer.h
│ │ ├── dispatcher.h
│ │ ├── fiber_group.h
│ │ ├── hybrid_condition.h
│ │ ├── hybrid_mutex.h
│ │ ├── memory/
│ │ │ └── first_touch_allocator.h
│ │ ├── pool.h
│ │ ├── ranges.h
│ │ ├── resources.h
│ │ ├── standard_threads.h
│ │ ├── task_pool.h
│ │ ├── thread_pool.h
│ │ ├── types.h
│ │ ├── userspace_threads.h
│ │ └── utils.h
│ ├── src/
│ │ ├── affinity.cc
│ │ ├── cyclic_buffer.cc
│ │ ├── cyclic_windowed_buffer.cc
│ │ ├── memory/
│ │ │ ├── copy.cc
│ │ │ ├── host_memory.cc
│ │ │ ├── malloc.cc
│ │ │ ├── memory.cc
│ │ │ ├── sysv_allocator.cc
│ │ │ └── tensor_shape.cc
│ │ ├── types.cc
│ │ └── utils.cc
│ └── tests/
│ ├── BUILD.bazel
│ ├── CMakeLists.txt
│ ├── test_affinity.cc
│ ├── test_async.cc
│ ├── test_async_compute.cc
│ ├── test_batcher.cc
│ ├── test_common.cc
│ ├── test_common.h
│ ├── test_cyclic_allocator.cc
│ ├── test_cyclic_windowed_buffer.cc
│ ├── test_foo_memory.cc
│ ├── test_main.cc
│ ├── test_memory.cc
│ ├── test_memory_old.cc
│ ├── test_memory_stack.cc
│ ├── test_pool.cc
│ ├── test_stl_allocator.cc
│ ├── test_sysv_allocator.cc
│ ├── test_tensor.cc
│ ├── test_thread_pool.cc
│ ├── test_transactional_allocator.h
│ └── test_types.cc
├── cuda/
│ ├── BUILD.bazel
│ ├── CMakeLists.txt
│ ├── benchmarks/
│ │ ├── CMakeLists.txt
│ │ ├── bench_cuda_memory.cc
│ │ └── bench_main.cc
│ ├── include/
│ │ └── trtlab/
│ │ └── cuda/
│ │ ├── common.h
│ │ ├── cyclic_windowed_buffer.h
│ │ ├── device_guard.h
│ │ ├── device_info.h
│ │ ├── memory/
│ │ │ ├── cuda_allocators.h
│ │ │ └── device_memory.h
│ │ └── sync.h
│ ├── src/
│ │ ├── copy.cc
│ │ ├── cuda_allocators.cc
│ │ ├── device_guard.cc
│ │ └── device_info.cc
│ └── tests/
│ ├── CMakeLists.txt
│ ├── test_allocators.cc
│ ├── test_device_info.cc
│ ├── test_main.cc
│ └── test_memory.cc
├── memory/
│ ├── CMakeLists.txt
│ ├── benchmarks/
│ │ ├── CMakeLists.txt
│ │ ├── bench_memory.cc
│ │ ├── bench_memory_pool.cc
│ │ └── main.cc
│ ├── cmake/
│ │ ├── configuration.cmake
│ │ └── dependencies.cmake
│ ├── include/
│ │ └── trtlab/
│ │ └── memory/
│ │ ├── align.h
│ │ ├── allocator.h
│ │ ├── allocator_storage.h
│ │ ├── allocator_traits.h
│ │ ├── bfit_allocator.h
│ │ ├── block_allocators.h
│ │ ├── block_arena.h
│ │ ├── block_manager.h
│ │ ├── block_stack.h
│ │ ├── config.h
│ │ ├── debugging.h
│ │ ├── deleter.h
│ │ ├── descriptor.h
│ │ ├── detail/
│ │ │ ├── assert.h
│ │ │ ├── block_list.h
│ │ │ ├── container_node_sizes.h
│ │ │ ├── debug_helpers.h
│ │ │ ├── free_list.h
│ │ │ ├── memory_stack.h
│ │ │ ├── page_info.h
│ │ │ ├── ranges.h
│ │ │ └── utility.h
│ │ ├── error.h
│ │ ├── huge_page_allocator.h
│ │ ├── literals.h
│ │ ├── malloc_allocator.h
│ │ ├── memory_block.h
│ │ ├── memory_pool.h
│ │ ├── memory_resource.h
│ │ ├── memory_type.h
│ │ ├── memory_typed_allocator.h
│ │ ├── posix_aligned_allocator.h
│ │ ├── raii_allocator.h
│ │ ├── smart_ptr.h
│ │ ├── std_allocator.h
│ │ ├── threading.h
│ │ ├── trackers.h
│ │ ├── tracking.h
│ │ ├── transactional_allocator.h
│ │ └── utils.h
│ ├── src/
│ │ ├── CMakeLists.txt
│ │ ├── align.cc
│ │ ├── block_stack.cc
│ │ ├── config.h.in
│ │ ├── descriptor.cc
│ │ ├── detail/
│ │ │ ├── block_list.cc
│ │ │ ├── free_list.cc
│ │ │ ├── free_list_utils.h
│ │ │ └── page_info.c
│ │ ├── error.cc
│ │ ├── ilog2.h
│ │ ├── memory_type.cc
│ │ ├── trackers.cc
│ │ └── utils.cc
│ ├── tests/
│ │ ├── CMakeLists.txt
│ │ ├── test_main.cc
│ │ └── test_memory.cc
│ └── tools/
│ ├── CMakeLists.txt
│ ├── node_size_debugger.cpp
│ ├── node_size_debugger.hpp
│ └── test_types.hpp
├── nvrpc/
│ ├── BUILD.bazel
│ ├── CMakeLists.txt
│ ├── include/
│ │ └── nvrpc/
│ │ ├── client/
│ │ │ ├── base_context.h
│ │ │ ├── client_single_up_multiple_down.h
│ │ │ ├── client_streaming.h
│ │ │ ├── client_streaming_v2.h
│ │ │ ├── client_streaming_v3.h
│ │ │ ├── client_unary.h
│ │ │ ├── client_unary_v2.h
│ │ │ └── executor.h
│ │ ├── context.h
│ │ ├── executor.h
│ │ ├── fiber/
│ │ │ └── executor.h
│ │ ├── interfaces.h
│ │ ├── life_cycle_batching.h
│ │ ├── life_cycle_bidirectional.h
│ │ ├── life_cycle_streaming.h
│ │ ├── life_cycle_unary.h
│ │ ├── rpc.h
│ │ ├── server.h
│ │ └── service.h
│ ├── src/
│ │ ├── client/
│ │ │ └── client_executor.cc
│ │ ├── executor.cc
│ │ └── server.cc
│ └── tests/
│ ├── CMakeLists.txt
│ ├── test_build_client.h
│ ├── test_build_server.h
│ ├── test_pingpong.cc
│ ├── test_pingpong.h
│ ├── test_resources.cc
│ ├── test_resources.h
│ ├── test_server.cc
│ └── testing.proto
├── pybind/
│ ├── CMakeLists.txt
│ └── trtlab/
│ ├── CMakeLists.txt
│ ├── infer.cc
│ ├── utils.cc
│ └── utils.h
└── tensorrt/
├── BUILD.bazel
├── CMakeLists.txt
├── include/
│ └── trtlab/
│ └── tensorrt/
│ ├── allocator.h
│ ├── bindings.h
│ ├── buffers.h
│ ├── common.h
│ ├── execution_context.h
│ ├── infer_bench.h
│ ├── infer_runner.h
│ ├── inference_manager.h
│ ├── model.h
│ ├── runtime.h
│ ├── utils.h
│ └── workspace.h
├── src/
│ ├── allocator.cc
│ ├── bindings.cc
│ ├── buffers.cc
│ ├── execution_context.cc
│ ├── infer_bench.cc
│ ├── inference_manager.cc
│ ├── model.cc
│ ├── runtime.cc
│ ├── utils.cc
│ └── workspace.cc
└── tests/
├── CMakeLists.txt
└── test_buffers.cc
SYMBOL INDEX (1107 symbols across 227 files)
FILE: examples/00_TensorRT/infer.cc
function ModelName (line 55) | static std::string ModelName(int model_id)
function ValidateEngine (line 62) | static bool ValidateEngine(const char* flagname, const std::string& value)
function main (line 79) | int main(int argc, char* argv[])
FILE: examples/00_TensorRT/inference.cc
function ModelName (line 53) | static std::string ModelName(int model_id)
class InferenceResources (line 60) | class InferenceResources : public InferenceManager
method InferenceResources (line 63) | InferenceResources(int max_executions, int max_buffers, size_t nCuda, ...
class Inference (line 80) | class Inference final
method Inference (line 83) | Inference(std::shared_ptr<InferenceResources> resources) : m_Resources...
method Run (line 85) | void Run(float seconds, bool warmup, int replicas, uint32_t requested_...
method GetResources (line 173) | inline std::shared_ptr<InferenceResources> GetResources() { return m_R...
function ValidateEngine (line 179) | static bool ValidateEngine(const char* flagname, const std::string& value)
function main (line 196) | int main(int argc, char* argv[])
FILE: examples/01_Basic_GRPC/src/async_client.cc
function main (line 54) | int main(int argc, char** argv)
FILE: examples/01_Basic_GRPC/src/client.cpp
class SimpleClient (line 45) | class SimpleClient {
method SimpleClient (line 47) | SimpleClient(std::shared_ptr<Channel> channel)
method Compute (line 52) | int Compute(const int batch_id) {
function main (line 83) | int main(int argc, char** argv) {
FILE: examples/01_Basic_GRPC/src/server.cpp
type SimpleResources (line 89) | struct SimpleResources : public Resources
method SimpleResources (line 91) | SimpleResources(int numThreadsInPool=3) : m_ThreadPool(numThreadsInPoo...
method ThreadPool (line 95) | ThreadPool& AcquireThreadPool()
class SimpleContext (line 108) | class SimpleContext final : public Context<simple::Input, simple::Output...
method ExecuteRPC (line 110) | void ExecuteRPC(RequestType &input, ResponseType &output) final override
function main (line 128) | int main(int argc, char *argv[])
FILE: examples/02_TensorRT_GRPC/src/async-client.cc
class GreeterClient (line 70) | class GreeterClient
method GreeterClient (line 73) | explicit GreeterClient(std::shared_ptr<Channel> channel) : stub_(Infer...
method SayHello (line 76) | void SayHello(const size_t batch_id, const int batch_size)
method AsyncCompleteRpc (line 103) | void AsyncCompleteRpc()
method Shutdown (line 131) | void Shutdown() { cq_.Shutdown(); }
type AsyncClientCall (line 135) | struct AsyncClientCall
function main (line 163) | int main(int argc, char** argv)
FILE: examples/02_TensorRT_GRPC/src/metrics.cc
type trtlab (line 32) | namespace trtlab {
function Metrics (line 54) | Metrics* Metrics::GetSingleton()
FILE: examples/02_TensorRT_GRPC/src/metrics.h
function namespace (line 35) | namespace trtlab {
FILE: examples/02_TensorRT_GRPC/src/server.cc
class FlowersResources (line 121) | class FlowersResources : public InferenceManager
method FlowersResources (line 124) | explicit FlowersResources(int max_executions, int max_buffers, int nCu...
method ThreadPool (line 131) | ThreadPool& GetCudaThreadPool() { return m_CudaThreadPool; }
method ThreadPool (line 132) | ThreadPool& GetResponseThreadPool() { return m_ResponseThreadPool; }
class FlowersContext (line 148) | class FlowersContext final : public Context<BatchInput, BatchPredictions...
method ExecuteRPC (line 150) | void ExecuteRPC(RequestType& input, ResponseType& output) final override
method WriteBatchPredictions (line 187) | void WriteBatchPredictions(RequestType& input, ResponseType& output, f...
function ValidateEngine (line 216) | static bool ValidateEngine(const char* flagname, const std::string& value)
function ValidateBytes (line 222) | static bool ValidateBytes(const char* flagname, const std::string& value)
function main (line 243) | int main(int argc, char* argv[])
FILE: examples/02_TensorRT_GRPC/src/siege.cc
class GreeterClient (line 74) | class GreeterClient
method GreeterClient (line 77) | explicit GreeterClient(std::shared_ptr<Channel> channel, int max_outst...
method SayHello (line 84) | void SayHello(const size_t batch_id, const int batch_size, char* bytes...
method AsyncCompleteRpc (line 134) | void AsyncCompleteRpc()
method Shutdown (line 182) | void Shutdown() { cq_.Shutdown(); }
type AsyncClientCall (line 186) | struct AsyncClientCall
function ValidateBytes (line 218) | static bool ValidateBytes(const char* flagname, const std::string& value)
function main (line 236) | int main(int argc, char** argv)
FILE: examples/02_TensorRT_GRPC/src/sync-client.cc
class SimpleClient (line 64) | class SimpleClient
method SimpleClient (line 67) | SimpleClient(std::shared_ptr<Channel> channel) : stub_(Inference::NewS...
method Compute (line 71) | int Compute(const int batch_id, const int batch_size)
function main (line 108) | int main(int argc, char** argv)
FILE: examples/03_Batching/inference-batcher.cc
type BatchingService (line 73) | struct BatchingService
type MessageType (line 77) | struct MessageType
class Client (line 88) | class Client
method Client (line 95) | Client(PrepareFunc prepare_func, std::shared_ptr<ThreadPool> thread_...
method WriteAndCloseStream (line 107) | void WriteAndCloseStream(uint32_t messages_count, MessageType* messa...
class Call (line 123) | class Call
method Call (line 126) | Call() : m_Started(false), m_NextState(&Call::StateInvalid) {}
method Push (line 129) | void Push(MessageType& message)
method Start (line 137) | void Start()
method RunNextState (line 146) | bool RunNextState(bool ok)
method Fail (line 155) | bool Fail()
method WriteNext (line 161) | void WriteNext()
method ReadNext (line 179) | void ReadNext()
method StateWriteDone (line 196) | bool StateWriteDone(bool ok)
method StateReadDone (line 204) | bool StateReadDone(bool ok)
method StateCloseStreamDone (line 221) | bool StateCloseStreamDone(bool ok)
method StateFinishedDone (line 229) | bool StateFinishedDone(bool ok)
method StateInvalid (line 240) | bool StateInvalid(bool ok) { LOG(FATAL) << "This should never be c...
method ProgressEngine (line 257) | void ProgressEngine(::grpc::CompletionQueue& cq)
class Resources (line 280) | class Resources : public ::trtlab::Resources
method Resources (line 283) | Resources(uint32_t max_batch_size, uint64_t timeout, std::shared_ptr...
method PreprocessRequest (line 288) | virtual void PreprocessRequest(Request* req) {}
method Push (line 290) | void Push(Request* req, Response* resp, Callback callback)
method ProgressEngine (line 298) | void ProgressEngine()
class ReceiveContext (line 347) | class ReceiveContext final : public ::nvrpc::Context<Request, Response...
method ExecuteRPC (line 349) | void ExecuteRPC(Request& request, Response& response) final override
function main (line 374) | int main(int argc, char* argv[])
FILE: examples/03_Batching/simple_batching_client.py
function run (line 7) | def run():
FILE: examples/03_Batching/simple_pb2_grpc.py
class InferenceStub (line 7) | class InferenceStub(object):
method __init__ (line 11) | def __init__(self, channel):
class InferenceServicer (line 29) | class InferenceServicer(object):
method Compute (line 33) | def Compute(self, request, context):
method BatchedCompute (line 40) | def BatchedCompute(self, request_iterator, context):
function add_InferenceServicer_to_server (line 48) | def add_InferenceServicer_to_server(servicer, server):
FILE: examples/03_Batching/streaming-service.cc
class SimpleContext (line 51) | class SimpleContext final : public BatchingContext<simple::Input, simple...
method ExecuteRPC (line 53) | void ExecuteRPC(std::vector<RequestType>& inputs,
method OnRequestReceived (line 65) | void OnRequestReceived(const RequestType& request) final override
function main (line 71) | int main(int argc, char* argv[])
FILE: examples/03_Batching/unary_client.py
function run (line 7) | def run():
FILE: examples/04_Middleman/middleman-client.cc
type MiddlemanService (line 86) | struct MiddlemanService
type MessageType (line 90) | struct MessageType
class Client (line 101) | class Client
method Client (line 108) | Client(PrepareFunc prepare_func, std::shared_ptr<ThreadPool> thread_...
method WriteAndCloseStream (line 120) | void WriteAndCloseStream(uint32_t messages_count, MessageType* messa...
class Call (line 138) | class Call
method Call (line 141) | Call() : m_NextState(&Call::StateFinishedDone) {}
method Push (line 144) | void Push(MessageType& message)
method RunNextState (line 152) | bool RunNextState(bool ok)
method Fail (line 161) | bool Fail()
method StateFinishedDone (line 167) | bool StateFinishedDone(bool ok)
method ProgressEngine (line 192) | void ProgressEngine(::grpc::CompletionQueue& cq)
class Resources (line 216) | class Resources : public ::trtlab::Resources
method Resources (line 219) | Resources(uint32_t max_batch_size, uint64_t timeout, std::shared_ptr...
method PreprocessRequest (line 224) | virtual void PreprocessRequest(Request* req) {}
method Push (line 226) | void Push(Request* req, Response* resp, Callback callback)
method ProgressEngine (line 234) | void ProgressEngine()
class ReceiveContext (line 274) | class ReceiveContext final : public ::nvrpc::Context<Request, Response...
method ExecuteRPC (line 276) | void ExecuteRPC(Request& request, Response& response) final override
class DemoMiddlemanService (line 304) | class DemoMiddlemanService : public InferMiddlemanService
class Resources (line 307) | class Resources : public InferMiddlemanService::Resources
method PreprocessRequest (line 311) | void PreprocessRequest(easter::InferRequest* req) override
function main (line 322) | int main(int argc, char* argv[])
FILE: examples/10_Internals/internals.cc
function main (line 57) | int main(int argc, char* argv[])
FILE: examples/12_ConfigGenerator/generator.cc
function DataTypeToBytes (line 43) | static size_t DataTypeToBytes(nvidia::inferenceserver::DataType dataType)
function ConvertTensorRTDataType (line 72) | static nvidia::inferenceserver::DataType ConvertTensorRTDataType(nvinfer...
function tensorrt_engine (line 89) | std::string tensorrt_engine(std::string model_name, std::string engine, ...
function PYBIND11_MODULE (line 131) | PYBIND11_MODULE(config_generator, m)
FILE: examples/12_FlatBuffers/client.cc
class SimpleClient (line 46) | class SimpleClient
method SimpleClient (line 49) | SimpleClient(std::shared_ptr<Channel> channel) : stub_(Greeter::NewStu...
method Compute (line 53) | std::string Compute(const int batch_id)
function main (line 92) | int main(int argc, char** argv)
FILE: examples/12_FlatBuffers/example.grpc.fb.h
function namespace (line 20) | namespace grpc {
function class (line 27) | class Greeter final
function class (line 187) | class Service : public ::grpc::Service
function RequestSayHello (line 217) | void RequestSayHello(
function RequestSayManyHellos (line 245) | void RequestSayManyHellos(
type WithAsyncMethod_SayHello (line 255) | typedef WithAsyncMethod_SayHello<WithAsyncMethod_SayManyHellos<Service>>...
type WithStreamedUnaryMethod_SayHello (line 325) | typedef WithStreamedUnaryMethod_SayHello<Service> StreamedUnaryService;
type WithSplitStreamingMethod_SayManyHellos (line 362) | typedef WithSplitStreamingMethod_SayManyHellos<Service> SplitStreamedSer...
type WithStreamedUnaryMethod_SayHello (line 363) | typedef WithStreamedUnaryMethod_SayHello<WithSplitStreamingMethod_SayMan...
FILE: examples/12_FlatBuffers/example_generated.h
type HelloReply (line 8) | struct HelloReply
type HelloRequest (line 10) | struct HelloRequest
type ManyHellosRequest (line 12) | struct ManyHellosRequest
function FLATBUFFERS_FINAL_CLASS (line 14) | struct HelloReply FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
function HelloReplyBuilder (line 31) | struct HelloReplyBuilder
function FLATBUFFERS_FINAL_CLASS (line 67) | struct HelloRequest FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
function HelloRequestBuilder (line 84) | struct HelloRequestBuilder
function FLATBUFFERS_FINAL_CLASS (line 120) | struct ManyHellosRequest FLATBUFFERS_FINAL_CLASS : private flatbuffers::...
function add_num_greetings (line 140) | struct ManyHellosRequestBuilder
function explicit (line 152) | explicit ManyHellosRequestBuilder(flatbuffers::FlatBufferBuilder& _fbb) ...
FILE: examples/12_FlatBuffers/server.cc
type SimpleResources (line 50) | struct SimpleResources : public Resources
class SimpleContext (line 54) | class SimpleContext final : public Context<Request, Response, SimpleReso...
method ExecuteRPC (line 56) | void ExecuteRPC(Request& input, Response& output) final override
function main (line 83) | int main(int argc, char* argv[])
FILE: examples/30_PyTensorRT/client.py
function main (line 11) | def main():
FILE: examples/30_PyTensorRT/compute.py
function main (line 12) | def main():
FILE: examples/30_PyTensorRT/infer_test_utils.py
function load_inputs (line 10) | def load_inputs(test_data_dir):
function load_outputs (line 22) | def load_outputs(test_data_dir):
function mnist_image (line 34) | def mnist_image(data):
function softmax (line 39) | def softmax(x):
FILE: examples/30_PyTensorRT/server.py
function main (line 11) | def main():
FILE: examples/98_MultiProcessSingleStream/setup.py
function main (line 43) | def main():
FILE: examples/99_LoadBalancer/run_loadbalancer.py
function render (line 37) | def render(template_path, data=None, extensions=None, strict=False):
function main (line 60) | def main(n, template):
FILE: examples/Deployment/ImageClient/client.cc
function PYBIND11_MODULE (line 114) | PYBIND11_MODULE(deploy_image_client, m)
FILE: examples/Deployment/ImageClient/client.h
function class (line 37) | class ClassifyResult
type DetectionResult (line 47) | struct DetectionResult
function class (line 57) | class ImageClient
FILE: examples/Deployment/ImageClient/client.py
class ImageClient (line 32) | class ImageClient:
method __init__ (line 34) | def __init__(self, *, hostname = "trt.lab"):
method classify (line 38) | def classify(self, image_path, model):
method object_detection (line 42) | def object_detection(self, image_path, model):
method _get_s3_client (line 46) | def _get_s3_client(self):
method _check_if_file (line 56) | def _check_if_file(self, file_path):
method _upload_to_s3 (line 60) | def _upload_to_s3(self, image_path):
FILE: examples/Deployment/RouteRequests/test_client.py
function main (line 6) | def main():
FILE: examples/Deployment/RouteRequests/test_service.cc
class TestResources (line 59) | class TestResources : public Resources
method TestResources (line 62) | TestResources(const std::string& hostname) : m_Hostname(hostname) {}
class TestContext (line 70) | class TestContext final : public Context<ImageInfo, Output, TestResources>
method ExecuteRPC (line 72) | void ExecuteRPC(ImageInfo& input, Output& output) final override
function main (line 80) | int main(int argc, char* argv[])
FILE: examples/Deployment/batcher.cc
type MessageType (line 12) | struct MessageType
function Resources (line 22) | Resources(PrepareFn prepare_fn, std::shared_ptr<client::Executor> executor,
function CreateClient (line 30) | std::shared_ptr<ClientStreaming<Request, Response>>
function Enqueue (line 39) | void Enqueue(Request& req, Response& resp, Callback callback)
class BatchingContext (line 135) | class BatchingContext : public Context<Request, Response, Resources>
method ExecuteRPC (line 137) | void ExecuteRPC(Request& request, Response& response) final override
FILE: examples/ONNX/resnet50/build.py
function main (line 47) | def main(models, batch, precision):
FILE: examples/ONNX/resnet50/calibrator.py
class ONNXEntropyCalibrator (line 61) | class ONNXEntropyCalibrator(trt.IInt8EntropyCalibrator):
method __init__ (line 62) | def __init__(self, image_dir, batch_size, calibration_batches, cache_f...
method transform_image (line 96) | def transform_image(self, img):
method read_image_batch (line 108) | def read_image_batch(self):
method get_batch_size (line 120) | def get_batch_size(self):
method get_batch (line 126) | def get_batch(self, names):
method read_calibration_cache (line 138) | def read_calibration_cache(self):
method write_calibration_cache (line 144) | def write_calibration_cache(self, cache):
FILE: examples/ONNX/resnet50/int8.py
function build_int8_engine_onnx (line 5) | def build_int8_engine_onnx(model_file, image_dir, batch_size, calibratio...
FILE: examples/ONNX/resnet50/onnx_utils.py
function load_inputs (line 10) | def load_inputs(test_data_dir):
function load_outputs (line 22) | def load_outputs(test_data_dir):
function mnist_image (line 34) | def mnist_image(data):
function softmax (line 39) | def softmax(x):
FILE: examples/ONNX/resnet50/run_jpeg_test.py
function tensorrt_init (line 21) | def tensorrt_init(engines):
function infer_image (line 30) | def infer_image(runner, image):
function preprocess_image (line 41) | def preprocess_image(runner, image_path):
function transform_image (line 49) | def transform_image(img):
function validate_results (line 61) | def validate_results(computed, expected):
function main (line 74) | def main(engine, image):
FILE: examples/ONNX/resnet50/run_onnx_tests.py
function tensorrt_init (line 13) | def tensorrt_init(engines):
function test_data (line 22) | def test_data(test_path):
function run_test (line 32) | def run_test(runner, inputs, outputs):
function preprocess_inputs (line 38) | def preprocess_inputs(runner, inputs):
function validate_results (line 52) | def validate_results(computed, expected):
function main (line 65) | def main(engine, tests):
FILE: examples/nvRPC/SharedMemoryService/client.cc
class SimpleClient (line 56) | class SimpleClient final
method SimpleClient (line 59) | SimpleClient(std::shared_ptr<Channel> channel)
method Compute (line 65) | int Compute(const int batch_id)
method RandomAllocation (line 107) | CyclicAllocator<SystemV>::Descriptor RandomAllocation()
function main (line 119) | int main(int argc, char** argv)
FILE: examples/nvRPC/SharedMemoryService/server.cc
class ExternalSharedMemoryManager (line 68) | class ExternalSharedMemoryManager final
class PartialSegmentDescriptor (line 70) | class PartialSegmentDescriptor final : public Descriptor<SystemV>
method PartialSegmentDescriptor (line 73) | PartialSegmentDescriptor(const std::shared_ptr<SystemV>& segment, si...
method PartialSegmentDescriptor (line 80) | PartialSegmentDescriptor(PartialSegmentDescriptor&& other)
method PartialSegmentDescriptor (line 86) | PartialSegmentDescriptor& operator=(PartialSegmentDescriptor&&) = de...
method ExternalSharedMemoryManager (line 96) | ExternalSharedMemoryManager() = default;
method Descriptor (line 99) | Descriptor Acquire(size_t shm_id, size_t offset, size_t size)
method Release (line 106) | void Release(size_t shm_id)
method GetOrAttachToShmID (line 114) | std::shared_ptr<SystemV> GetOrAttachToShmID(size_t shm_id)
type SimpleResources (line 137) | struct SimpleResources : public Resources
method SimpleResources (line 139) | SimpleResources() = default;
method ExternalSharedMemoryManager (line 141) | ExternalSharedMemoryManager& GetExternalSharedMemoryManager()
class SimpleContext (line 150) | class SimpleContext final : public Context<simple::Input, simple::Output...
method ExecuteRPC (line 152) | void ExecuteRPC(RequestType& input, ResponseType& output) final override
function main (line 171) | int main(int argc, char* argv[])
FILE: examples/nvRPC/StreamingInOrderSendRecv/client.cc
function main (line 56) | int main(int argc, char** argv)
FILE: examples/nvRPC/StreamingInOrderSendRecv/server.cc
type SimpleResources (line 63) | struct SimpleResources : public Resources
method SimpleResources (line 65) | SimpleResources(int numThreadsInPool = 3) : m_ThreadPool(numThreadsInP...
method ThreadPool (line 70) | ThreadPool& AcquireThreadPool() { return m_ThreadPool; }
class SimpleContext (line 80) | class SimpleContext final
method ExecuteRPC (line 83) | void ExecuteRPC(RequestType& input, ResponseType& output) final override
function main (line 103) | int main(int argc, char* argv[])
FILE: examples/nvRPC/StreamingService/client.cc
function ValidateEven (line 52) | static bool ValidateEven(const char* flagname, int value)
function main (line 63) | int main(int argc, char** argv)
FILE: examples/nvRPC/StreamingService/even-odds.cc
type SimpleResources (line 30) | struct SimpleResources : public Resources
method SimpleResources (line 32) | SimpleResources(int numThreadsInPool = 3) : m_ThreadPool(numThreadsInP...
method ThreadPool (line 33) | ThreadPool& AcquireThreadPool() { return m_ThreadPool; }
class SimpleContext (line 39) | class SimpleContext final : public StreamingContext<simple::Input, simpl...
method RequestReceived (line 41) | void RequestReceived(RequestType&& input, std::shared_ptr<ServerStream...
function main (line 69) | int main(int argc, char* argv[])
FILE: examples/nvRPC/StreamingService/ping-pong.cc
type SimpleResources (line 30) | struct SimpleResources : public Resources
method SimpleResources (line 32) | SimpleResources(int numThreadsInPool = 3) : m_ThreadPool(numThreadsInP...
method ThreadPool (line 33) | ThreadPool& AcquireThreadPool() { return m_ThreadPool; }
class SimpleContext (line 39) | class SimpleContext final : public StreamingContext<simple::Input, simpl...
method RequestReceived (line 41) | void RequestReceived(RequestType&& input, std::shared_ptr<ServerStream...
function main (line 57) | int main(int argc, char* argv[])
FILE: examples/nvRPC/UnaryService/client.cc
function main (line 55) | int main(int argc, char** argv)
FILE: examples/nvRPC/UnaryService/server.cc
type SimpleResources (line 91) | struct SimpleResources : public Resources
method SimpleResources (line 93) | SimpleResources(int numThreadsInPool = 3) : m_ThreadPool(numThreadsInP...
method ThreadPool (line 98) | ThreadPool& AcquireThreadPool() { return m_ThreadPool; }
class SimpleContext (line 108) | class SimpleContext final : public Context<simple::Input, simple::Output...
method ExecuteRPC (line 110) | void ExecuteRPC(RequestType& input, ResponseType& output) final override
function main (line 128) | int main(int argc, char* argv[])
FILE: models/onnx/common.py
function GiB (line 13) | def GiB(val):
function find_sample_data (line 16) | def find_sample_data(description="Runs a TensorRT Python sample", subfol...
class HostDeviceMem (line 50) | class HostDeviceMem(object):
method __init__ (line 51) | def __init__(self, host_mem, device_mem):
method __str__ (line 55) | def __str__(self):
method __repr__ (line 58) | def __repr__(self):
function allocate_buffers (line 62) | def allocate_buffers(engine):
function do_inference (line 84) | def do_inference(context, bindings, inputs, outputs, stream):
FILE: models/onnx/onnx_builder.py
function softmax (line 17) | def softmax(X, theta = 1.0, axis = None):
class ModelData (line 60) | class ModelData(object):
function allocate_buffers (line 70) | def allocate_buffers(engine):
function do_inference (line 81) | def do_inference(context, h_input, d_input, h_output, d_output, stream):
function build_engine_onnx (line 92) | def build_engine_onnx(model_file, calibrator=None):
function normalize_image (line 113) | def normalize_image(image_name):
function load_normalized_test_case (line 121) | def load_normalized_test_case(test_image, pagelocked_buffer):
function create_calibration_dataset (line 126) | def create_calibration_dataset():
class ImageBatchStream (line 135) | class ImageBatchStream:
method __init__ (line 136) | def __init__(self, batch_size, calibration_files):
method reset (line 145) | def reset(self):
method next_batch (line 148) | def next_batch(self):
class MyEntropyCalibrator (line 166) | class MyEntropyCalibrator(trt.IInt8EntropyCalibrator):
method __init__ (line 167) | def __init__(self, stream):
method get_batch_size (line 173) | def get_batch_size(self):
method get_batch (line 176) | def get_batch(self, bindings, names):
method read_calibration_cache (line 184) | def read_calibration_cache(self, length):
method write_calibration_cache (line 187) | def write_calibration_cache(self, ptr, size):
function main (line 195) | def main():
function old_main (line 217) | def old_main():
FILE: models/setup.py
function main (line 43) | def main():
FILE: trtlab/core/benchmarks/bench_batcher.cc
function batcher_standard_batcher_int (line 36) | static void batcher_standard_batcher_int(benchmark::State& state)
type audio_state (line 55) | struct audio_state
function batcher_standard_batcher_audio (line 62) | static void batcher_standard_batcher_audio(benchmark::State& state)
function batcher_engine (line 81) | static void batcher_engine(benchmark::State& state)
FILE: trtlab/core/benchmarks/bench_memory.cc
function BM_Memory_SystemMalloc (line 37) | static void BM_Memory_SystemMalloc(benchmark::State& state)
function BM_Memory_SystemV_descriptor (line 49) | static void BM_Memory_SystemV_descriptor(benchmark::State& state)
FILE: trtlab/core/benchmarks/bench_memory_stack.cc
function allocators_transactional_raw (line 38) | static void allocators_transactional_raw(benchmark::State& state)
function allocators_transactional_std (line 57) | static void allocators_transactional_std(benchmark::State& state)
function allocators_transactional_md (line 78) | static void allocators_transactional_md(benchmark::State& state)
function make_vector (line 103) | auto make_vector(RawAllocator& alloc)
function BM_vector_transactional (line 110) | static void BM_vector_transactional(benchmark::State& state)
function BM_vector_smart_transactional (line 126) | static void BM_vector_smart_transactional(benchmark::State& state)
function BM_CyclicAllocator_stl_allocator (line 145) | static void BM_CyclicAllocator_stl_allocator(benchmark::State& state)
function BM_CyclicAllocator_stl_allocator2 (line 157) | static void BM_CyclicAllocator_stl_allocator2(benchmark::State& state)
function BM_vector_default (line 167) | static void BM_vector_default(benchmark::State& state)
function BM_stl_allocator_ctor (line 177) | static void BM_stl_allocator_ctor(benchmark::State& state)
function BM_stl_allocator_allocate_lifecycle (line 185) | static void BM_stl_allocator_allocate_lifecycle(benchmark::State& state)
FILE: trtlab/core/benchmarks/bench_pool.cc
function BM_Pool_v1_Pop (line 31) | static void BM_Pool_v1_Pop(benchmark::State& state)
function BM_Pool_v2_Pop (line 46) | static void BM_Pool_v2_Pop(benchmark::State& state)
function BM_Pool_v3_Pop (line 61) | static void BM_Pool_v3_Pop(benchmark::State& state)
function BM_Pool_v4_Pop (line 76) | static void BM_Pool_v4_Pop(benchmark::State& state)
function BM_Pool_v4_Pop_Shared (line 91) | static void BM_Pool_v4_Pop_Shared(benchmark::State& state)
function BM_Pool_v3_Pop_Userspace (line 106) | static void BM_Pool_v3_Pop_Userspace(benchmark::State& state)
FILE: trtlab/core/benchmarks/bench_thread_pool.cc
function BM_ThreadPool_Enqueue (line 32) | static void BM_ThreadPool_Enqueue(benchmark::State& state)
function BM_HybridThreadPool_Enqueue (line 47) | static void BM_HybridThreadPool_Enqueue(benchmark::State& state)
FILE: trtlab/core/include/trtlab/core/affinity.h
function namespace (line 32) | namespace trtlab
function class (line 61) | class affinity_guard final
type numa_node (line 79) | struct numa_node
function final (line 90) | struct affinity final
FILE: trtlab/core/include/trtlab/core/async_compute.h
function namespace (line 32) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/batcher.h
function namespace (line 12) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/cyclic_buffer.h
function namespace (line 38) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/cyclic_windowed_buffer.h
function namespace (line 39) | namespace trtlab
function replicate (line 280) | void replicate(void* dst, const void* src, std::size_t size) final override
type reservation (line 304) | struct reservation
function reservation (line 306) | const reservation reserve_window()
function reset (line 325) | void reset()
type reservation (line 334) | struct reservation
function reset (line 412) | void reset()
FILE: trtlab/core/include/trtlab/core/dispatcher.h
function namespace (line 24) | namespace trtlab
function future_type (line 79) | future_type enqueue(T item)
function shutdown (line 106) | void shutdown()
function QueueProgressTask (line 131) | void QueueProgressTask()
function ProgressTask (line 140) | void ProgressTask(std::size_t id)
function future_type (line 226) | future_type enqueue(T item)
function shutdown (line 256) | void shutdown()
function QueueProgressTask (line 285) | void QueueProgressTask()
function ProgressTask (line 296) | void ProgressTask(std::size_t id)
FILE: trtlab/core/include/trtlab/core/fiber_group.h
function namespace (line 7) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/hybrid_condition.h
function notify_one (line 111) | void notify_one() noexcept
function notify_all (line 127) | void notify_all() noexcept
type timespec (line 175) | struct timespec
type timespec (line 176) | struct timespec
FILE: trtlab/core/include/trtlab/core/hybrid_mutex.h
function sys_futex (line 43) | inline int sys_futex(void* addr1, int op, int val1, const struct timespe...
function lock (line 74) | void lock() noexcept
function try_lock (line 102) | bool try_lock() noexcept
function unlock (line 116) | void unlock() noexcept
FILE: trtlab/core/include/trtlab/core/memory/first_touch_allocator.h
function namespace (line 14) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/pool.h
function namespace (line 40) | namespace trtlab
function namespace (line 145) | namespace v1
function namespace (line 247) | namespace v2
function namespace (line 327) | namespace v3
type key (line 459) | struct key
function Pool (line 472) | Pool(key) {}
function virtual (line 473) | virtual ~Pool() {}
function pool_t (line 478) | static pool_t Create()
function Push (line 483) | void Push(ResourceType&& resource)
function push (line 488) | void push(ResourceType&& resource)
function SharedItem (line 510) | SharedItem Pop()
function SharedItem (line 515) | SharedItem PopWithoutReturn()
function UniqueItem (line 520) | UniqueItem pop_unique(on_return_fn on_return)
function UniqueItem (line 525) | UniqueItem pop_unique()
function UniqueItem (line 530) | UniqueItem pop_unique_without_return()
function SharedItem (line 535) | SharedItem pop_shared(on_return_fn on_return)
function SharedItem (line 544) | SharedItem pop_shared()
function SharedItem (line 549) | SharedItem pop_shared_without_return()
function item_t (line 576) | item_t internal_pop()
function class (line 585) | class UniqueItem
function ResourceType (line 621) | const ResourceType* operator->() const
type key (line 643) | struct key
function UniquePool (line 656) | UniquePool(key) {}
function virtual (line 657) | virtual ~UniquePool() {}
function pool_t (line 662) | static pool_t Create()
function push (line 667) | void push(item_t item)
function UniqueItem (line 678) | UniqueItem pop_unique(on_return_fn on_return)
function UniqueItem (line 683) | UniqueItem pop_unique()
function UniqueItem (line 688) | UniqueItem pop_unique_without_return()
function item_t (line 709) | item_t internal_pop()
function ResourceType (line 755) | const ResourceType* operator->() const
FILE: trtlab/core/include/trtlab/core/ranges.h
function namespace (line 7) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/resources.h
function namespace (line 31) | namespace trtlab {
FILE: trtlab/core/include/trtlab/core/standard_threads.h
function namespace (line 4) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/task_pool.h
function namespace (line 34) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/thread_pool.h
function namespace (line 71) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/types.h
function namespace (line 36) | namespace trtlab {
FILE: trtlab/core/include/trtlab/core/userspace_threads.h
function namespace (line 4) | namespace trtlab
FILE: trtlab/core/include/trtlab/core/utils.h
function namespace (line 39) | namespace trtlab {
FILE: trtlab/core/src/affinity.cc
function cpu_set (line 45) | cpu_set cpu_set::get_intersection(const cpu_set& other) const
function cpu_set (line 52) | cpu_set cpu_set::get_union(const cpu_set& other) const
function cpu_set (line 59) | cpu_set cpu_set::get_difference(const cpu_set& other) const
function cpu_set (line 66) | cpu_set cpu_set::from_string(std::string ids)
function cpu_set (line 147) | cpu_set affinity::this_thread::get_affinity()
function ConvertString2Int (line 209) | int ConvertString2Int(const std::string& str)
function SplitStringToArray (line 217) | std::vector<std::string> SplitStringToArray(const std::string& str, char...
function parse_ints (line 229) | std::vector<int> parse_ints(const std::string data)
function cpu_int_string (line 258) | std::string cpu_int_string(const cpu_set& cpus, std::function<int(const ...
FILE: trtlab/core/src/cyclic_windowed_buffer.cc
function cyclic_windowed_buffer (line 38) | cyclic_windowed_buffer& cyclic_windowed_buffer::operator=(cyclic_windowe...
function cyclic_windowed_stack_impl (line 93) | cyclic_windowed_stack_impl& cyclic_windowed_stack_impl::operator=(cyclic...
FILE: trtlab/core/src/memory/copy.cc
type trtlab (line 32) | namespace trtlab {
function Copy (line 34) | void Copy(HostMemory& dst, size_t dst_offset, const HostMemory& src, s...
FILE: trtlab/core/src/memory/host_memory.cc
type trtlab (line 33) | namespace trtlab {
function DLContext (line 39) | DLContext HostMemory::DeviceContext()
FILE: trtlab/core/src/memory/malloc.cc
type trtlab (line 35) | namespace trtlab {
type foonathan (line 61) | namespace foonathan { namespace memory {
type memory (line 61) | namespace memory {
class memory::detail::lowlevel_allocator<detail::MallocAllocatorImpl> (line 68) | class memory::detail::lowlevel_allocator<detail::MallocAllocatorImpl>
class memory::allocator_traits<MallocAllocator> (line 69) | class memory::allocator_traits<MallocAllocator>
FILE: trtlab/core/src/memory/memory.cc
type trtlab (line 33) | namespace trtlab {
function mem_size_t (line 225) | mem_size_t CoreMemory::SizeOfDataType() const
function mem_size_t (line 235) | mem_size_t CoreMemory::SizeFromShape(const std::vector<mem_size_t>& sh...
FILE: trtlab/core/src/memory/sysv_allocator.cc
type foonathan (line 41) | namespace foonathan {
type memory (line 42) | namespace memory {
type trtlab (line 43) | namespace trtlab {
type sysv_detail (line 44) | namespace sysv_detail {
type sysv_allocation (line 46) | struct sysv_allocation final : memory_block
method sysv_allocation (line 48) | sysv_allocation() : memory_block(), shm_id(-1), release(true) {}
method sysv_allocation (line 49) | sysv_allocation(int id, void* mem, size_t size, bool rel = true)
method sysv_allocation (line 52) | sysv_allocation(const sysv_allocation&) = default;
method sysv_allocation (line 53) | sysv_allocation& operator=(const sysv_allocation&) = default;
method sysv_allocation (line 55) | sysv_allocation(sysv_allocation&& other)
method sysv_allocation (line 60) | sysv_allocation& operator=(sysv_allocation&& other)
class sysv_manager (line 72) | class sysv_manager final
method sysv_allocation (line 89) | static const sysv_allocation& allocate(std::size_t size)
method sysv_allocation (line 97) | static const sysv_allocation& attach(int shm_id)
method detach (line 102) | static int detach(void* addr)
method release (line 118) | static void release(int shm_id)
method size (line 135) | static std::size_t size(int shm_id)
method release_on_deallocate (line 141) | static bool release_on_deallocate()
method release_on_deallocate (line 148) | static bool release_on_deallocate(bool val)
method sysv_info (line 156) | static sysv_info sysv_info_for_pointer(void* ptr)
method sysv_manager (line 172) | sysv_manager() : m_release_on_deallocate(true) {}
method sysv_manager (line 174) | sysv_manager(const sysv_manager&) = delete;
method sysv_manager (line 175) | sysv_manager& operator=(const sysv_manager&) = delete;
method sysv_manager (line 177) | sysv_manager(sysv_manager&&) = delete;
method sysv_manager (line 178) | sysv_manager& operator=(sysv_manager&&) = delete;
method sysv_manager (line 180) | static sysv_manager& global_manager()
method sysv_allocation (line 186) | static const sysv_allocation& attach_impl(int shm_id, bool has...
method get_stats (line 197) | static struct shmid_ds get_stats(int shm_id)
method sysv_allocation (line 206) | const sysv_allocation& register_allocation(int shm_id, void* a...
method drop_allocation (line 216) | int drop_allocation(void* addr)
function sysv_info (line 266) | sysv_info sysv_allocator::sysv_info_for_pointer(void* ptr)
FILE: trtlab/core/src/memory/tensor_shape.cc
type trtlab (line 31) | namespace trtlab {
FILE: trtlab/core/src/types.cc
type trtlab (line 31) | namespace trtlab {
type types (line 32) | namespace types {
function dtype (line 46) | dtype& dtype::operator=(dtype&& other) noexcept
function dtype (line 54) | dtype& dtype::operator=(const dtype& other)
function DLDataType (line 77) | const DLDataType& dtype::to_dlpack() const { return m_DLPackType; }
FILE: trtlab/core/src/utils.cc
type trtlab (line 37) | namespace trtlab {
function BytesToString (line 44) | std::string BytesToString(size_t bytes)
function StringToBytes (line 60) | std::uint64_t StringToBytes(const std::string str)
FILE: trtlab/core/tests/test_affinity.cc
class TestAffinity (line 35) | class TestAffinity : public ::testing::Test {}
function TEST_F (line 37) | TEST_F(TestAffinity, Basics)
function TEST_F (line 64) | TEST_F(TestAffinity, IntString)
FILE: trtlab/core/tests/test_async.cc
class TestAsync (line 5) | class TestAsync : public ::testing::Test
function TEST_F (line 9) | TEST_F(TestAsync, FibersHello)
FILE: trtlab/core/tests/test_async_compute.cc
class TestAsyncCompute (line 36) | class TestAsyncCompute : public ::testing::Test
function TEST_F (line 40) | TEST_F(TestAsyncCompute, EvenTest)
function TEST_F (line 59) | TEST_F(TestAsyncCompute, OddTest)
function TEST_F (line 71) | TEST_F(TestAsyncCompute, ReturnUniquePtr)
function TEST_F (line 86) | TEST_F(TestAsyncCompute, ReturnVoid)
function TEST_F (line 95) | TEST_F(TestAsyncCompute, PackagedTask)
FILE: trtlab/core/tests/test_batcher.cc
class TestBatcher (line 11) | class TestBatcher : public ::testing::Test
function TEST_F (line 17) | TEST_F(TestBatcher, StandardBatcher)
function TEST_F (line 49) | TEST_F(TestBatcher, FullBatcher)
function TEST_F (line 93) | TEST_F(TestBatcher, FullBatcherUserThreads)
function TEST_F (line 139) | TEST_F(TestBatcher, ShortDeferredTaskPool)
FILE: trtlab/core/tests/test_common.cc
class TestCore (line 79) | class TestCore : public ::testing::Test {}
function TEST_F (line 81) | TEST_F(TestCore, FindRanges0)
function TEST_F (line 91) | TEST_F(TestCore, FindRanges1)
function TEST_F (line 100) | TEST_F(TestCore, FindRanges2)
function TEST_F (line 109) | TEST_F(TestCore, FindRanges3)
function TEST_F (line 118) | TEST_F(TestCore, FindRanges4)
function TEST_F (line 127) | TEST_F(TestCore, FindRanges5)
FILE: trtlab/core/tests/test_common.h
function class (line 37) | class TrackedTest : public ::testing::Test
type log_tracker (line 47) | struct log_tracker
function exception (line 62) | struct timeout_error : std::exception
FILE: trtlab/core/tests/test_cyclic_allocator.cc
class TestCyclicStacks (line 39) | class TestCyclicStacks : public ::testing::Test
function TYPED_TEST (line 47) | TYPED_TEST(TestCyclicStacks, EmptyOnCreate)
function TYPED_TEST (line 54) | TYPED_TEST(TestCyclicStacks, AddSegment)
function TYPED_TEST (line 61) | TYPED_TEST(TestCyclicStacks, DropSegment)
function TYPED_TEST (line 68) | TYPED_TEST(TestCyclicStacks, Allocate)
function TYPED_TEST (line 113) | TYPED_TEST(TestCyclicStacks, AllocateThenReleaseStack)
function TYPED_TEST (line 134) | TYPED_TEST(TestCyclicStacks, AllocateShouldFail)
FILE: trtlab/core/tests/test_cyclic_windowed_buffer.cc
class TestWindowedBuffer (line 45) | class TestWindowedBuffer : public ::testing::Test
function TEST_F (line 49) | TEST_F(TestWindowedBuffer, SynchronousNonOverlapping)
function TEST_F (line 87) | TEST_F(TestWindowedBuffer, SynchronousOverlapping)
function SyncFnFromFuture (line 128) | std::function<bool(bool)> SyncFnFromFuture(std::shared_future<T>&& shared)
function TEST_F (line 145) | TEST_F(TestWindowedBuffer, AsynchronousNonOverlapping)
function TEST_F (line 199) | TEST_F(TestWindowedBuffer, AsynchronousOverlapping)
type test_cw_stack (line 256) | struct test_cw_stack : public ::trtlab::cyclic_windowed_stack<memory::ho...
function TEST_F (line 266) | TEST_F(TestWindowedBuffer, Stack)
function TEST_F (line 336) | TEST_F(TestWindowedBuffer, Reservation)
FILE: trtlab/core/tests/test_foo_memory.cc
function make_vector (line 54) | auto make_vector(RawAllocator&& alloc)
function make_vector (line 60) | auto make_vector(memory::trtlab::allocator<RawAllocator, Mutex> alloc)
class TestFooMemory (line 65) | class TestFooMemory : public TrackedTest {}
function TEST_F (line 67) | TEST_F(TestFooMemory, BlankTest)
function TEST_F (line 73) | TEST_F(TestFooMemory, Malloc)
function TEST_F (line 89) | TEST_F(TestFooMemory, MallocTraits)
function TEST_F (line 126) | TEST_F(TestFooMemory, MallocAsStdAllocator)
function TEST_F (line 157) | TEST_F(TestFooMemory, MallocThreadSafe)
function TEST_F (line 188) | TEST_F(TestFooMemory, GrowthCappedBlockAllocator)
function TEST_F (line 228) | TEST_F(TestFooMemory, BlockArena)
function make_raw_transactional_allocator (line 286) | auto make_raw_transactional_allocator(std::size_t block_size, std::size_...
function make_smart_transactional_allocator (line 308) | auto make_smart_transactional_allocator(std::size_t block_size, std::siz...
function TEST_F (line 321) | TEST_F(TestFooMemory, TransactionalLifeCycle)
function TEST_F (line 362) | TEST_F(TestFooMemory, TransactionalAllocatorFullyCaptured)
function TEST_F (line 377) | TEST_F(TestFooMemory, SmartTransactionalLifeCycle)
function TEST_F (line 395) | TEST_F(TestFooMemory, SmartTransactionalBase)
function TEST_F (line 405) | TEST_F(TestFooMemory, SmartTransactionalDescriptor)
function TEST_F (line 445) | TEST_F(TestFooMemory, AlignedSmartAllocator)
function TEST_F (line 488) | TEST_F(TestFooMemory, SmartAllocatorStateful)
function TEST_F (line 642) | TEST_F(TestFooMemory, SmartAllocatorStateless)
type backend (line 676) | struct backend {}
type backend_a (line 677) | struct backend_a : backend {}
type backend_b (line 678) | struct backend_b : backend_a {}
type backend_c (line 679) | struct backend_c : backend {}
class md (line 682) | class md : public BackendType
method md (line 685) | md() : m_ptr(nullptr) {}
method md (line 686) | md(void* ptr) : m_ptr(ptr) {}
function do_a (line 694) | void do_a(md<BackendType>& require_a_or_derived_from_a)
function TEST_F (line 701) | TEST_F(TestFooMemory, TemplateInheritance)
type pinned_memory (line 717) | struct pinned_memory : memory::host_memory
method DLDeviceType (line 719) | static constexpr DLDeviceType device_type() { return kDLCPUPinned; }
function TEST_F (line 724) | TEST_F(TestFooMemory, OtherMemoryTypes)
type malloc_allocator_1024 (line 743) | struct malloc_allocator_1024 : public memory::malloc_allocator
method min_alignment (line 745) | constexpr static std::size_t min_alignment() { return 1024UL; }
type malloc_allocator_1 (line 748) | struct malloc_allocator_1 : public memory::malloc_allocator
method min_alignment (line 750) | constexpr static std::size_t min_alignment() { return 1UL; }
function TEST_F (line 753) | TEST_F(TestFooMemory, MinAlignment)
function TEST_F (line 770) | TEST_F(TestFooMemory, FirstTouch)
FILE: trtlab/core/tests/test_main.cc
function main (line 31) | int main(int argc, char **argv) {
FILE: trtlab/core/tests/test_memory.cc
class TestMemory (line 7) | class TestMemory : public ::testing::Test {}
function TEST_F (line 11) | TEST_F(TestMemory, FirstTouchAllocator)
FILE: trtlab/core/tests/test_memory_old.cc
class TestMemory (line 61) | class TestMemory : public ::testing::Test
function TYPED_TEST (line 76) | TYPED_TEST(TestMemory, make_shared)
function TYPED_TEST (line 93) | TYPED_TEST(TestMemory, make_unique)
function TYPED_TEST (line 110) | TYPED_TEST(TestMemory, ctor)
function TYPED_TEST (line 124) | TYPED_TEST(TestMemory, move_ctor)
function TYPED_TEST (line 139) | TYPED_TEST(TestMemory, move_ctor_with_reshape)
function TYPED_TEST (line 171) | TYPED_TEST(TestMemory, move_to_shared_ptr)
function TYPED_TEST (line 180) | TYPED_TEST(TestMemory, smart_move)
function TYPED_TEST (line 198) | TYPED_TEST(TestMemory, shape)
function TYPED_TEST (line 248) | TYPED_TEST(TestMemory, alignment)
class DescFromSharedPointer (line 269) | class DescFromSharedPointer : public Descriptor<MemoryType>
method DescFromSharedPointer (line 272) | DescFromSharedPointer(std::shared_ptr<MemoryType> shared)
method CaptureSharedObject (line 282) | std::function<void()> CaptureSharedObject()
function TYPED_TEST (line 291) | TYPED_TEST(TestMemory, DescriptorFromSharedPointer)
class TestSystemVMemory (line 327) | class TestSystemVMemory : public ::testing::Test
function TEST_F (line 331) | TEST_F(TestSystemVMemory, same_process)
function TEST_F (line 361) | TEST_F(TestSystemVMemory, smart_ptrs)
function TEST_F (line 383) | TEST_F(TestSystemVMemory, TryAttachingToDeletedSegment)
FILE: trtlab/core/tests/test_memory_stack.cc
class TestMemoryStack (line 43) | class TestMemoryStack : public ::testing::Test
method SetUp (line 46) | virtual void SetUp() { stack = std::make_shared<MemoryStack<Malloc>>(o...
method TearDown (line 48) | virtual void TearDown() { stack->Reset(); }
class TestSmartStack (line 53) | class TestSmartStack : public ::testing::Test
method SetUp (line 56) | virtual void SetUp() { stack = SmartStack<SystemV>::Create(one_mb); }
method TearDown (line 58) | virtual void TearDown()
function TEST_F (line 69) | TEST_F(TestMemoryStack, EmptyOnCreate)
function TEST_F (line 76) | TEST_F(TestMemoryStack, AllocateAndReset)
function TEST_F (line 87) | TEST_F(TestMemoryStack, Unaligned)
function TEST_F (line 104) | TEST_F(TestSmartStack, EmptyOnCreate)
function TEST_F (line 111) | TEST_F(TestSmartStack, AllocateAndReset)
function TEST_F (line 123) | TEST_F(TestSmartStack, Unaligned)
function TEST_F (line 155) | TEST_F(TestSmartStack, PassMemory)
function TEST_F (line 161) | TEST_F(TestSmartStack, PassSpecializedMemory)
FILE: trtlab/core/tests/test_pool.cc
type Object (line 33) | struct Object
method Object (line 35) | Object(std::string name) : m_Name(name), m_Original(name) {}
method Object (line 41) | Object(Object&& other) noexcept = default;
method Object (line 42) | Object& operator=(Object&& other) noexcept = default;
method SetName (line 44) | void SetName(std::string name)
method GetName (line 48) | const std::string GetName() const
method Reset (line 53) | void Reset()
type ShareableObject (line 63) | struct ShareableObject : public Object, public std::enable_shared_from_t...
method Copy (line 66) | auto Copy() { return shared_from_this(); }
class TestPool (line 70) | class TestPool : public ::testing::Test
method SetUp (line 73) | virtual void SetUp()
method TearDown (line 85) | virtual void TearDown() {}
function TEST_F (line 92) | TEST_F(TestPool, EmptyOnCreate)
function TEST_F (line 97) | TEST_F(TestPool, Push)
function TEST_F (line 105) | TEST_F(TestPool, Pop)
function TEST_F (line 122) | TEST_F(TestPool, PopOnReturn)
function TEST_F (line 148) | TEST_F(TestPool, PopOnReturnWithCapture)
function TEST_F (line 189) | TEST_F(TestPool, PopWithoutReturn)
FILE: trtlab/core/tests/test_stl_allocator.cc
class TestCustomAllocator (line 15) | class TestCustomAllocator : public ::testing::Test
function TEST_F (line 22) | TEST_F(TestCustomAllocator, CustomVector)
FILE: trtlab/core/tests/test_sysv_allocator.cc
class TestSysV (line 42) | class TestSysV : public TrackedTest
function TEST_F (line 46) | TEST_F(TestSysV, BlankTest)
function TEST_F (line 53) | TEST_F(TestSysV, LifeCycle)
function TEST_F (line 64) | TEST_F(TestSysV, Attach)
function TEST_F (line 93) | TEST_F(TestSysV, AttachShouldFailIfSegmentHasBeenReleased)
function TEST_F (line 120) | TEST_F(TestSysV, AsBaseForHighLevelAllocators)
FILE: trtlab/core/tests/test_tensor.cc
class TestTensor (line 39) | class TestTensor : public ::testing::Test
function TEST_F (line 43) | TEST_F(TestTensor, StateFromBytes)
function TEST_F (line 75) | TEST_F(TestTensor, ReshapeView)
function TEST_F (line 121) | TEST_F(TestTensor, Shapes1D)
function TEST_F (line 133) | TEST_F(TestTensor, ShapesNDGeneric)
function TEST_F (line 147) | TEST_F(TestTensor, ShapesNDGenericWithStrides)
function TEST_F (line 159) | TEST_F(TestTensor, ShapesEmpty)
FILE: trtlab/core/tests/test_thread_pool.cc
class TestThreadPool (line 33) | class TestThreadPool : public ::testing::Test
method SetUp (line 36) | virtual void SetUp() { thread_pool = std::make_shared<ThreadPool>(3); }
method TearDown (line 38) | virtual void TearDown() {}
function TEST_F (line 43) | TEST_F(TestThreadPool, ReturnInt)
function TEST_F (line 49) | TEST_F(TestThreadPool, ReturnChainedInt)
function TEST_F (line 56) | TEST_F(TestThreadPool, MakeUnique) { auto unqiue = std::make_unique<Thre...
function TEST_F (line 58) | TEST_F(TestThreadPool, CaptureThis)
FILE: trtlab/core/tests/test_transactional_allocator.h
function tracked (line 52) | auto tracked = memory::make_tracked_allocator(log_tracker{"** tracker: b...
FILE: trtlab/core/tests/test_types.cc
class TestTypes (line 35) | class TestTypes : public ::testing::Test
function TEST_F (line 39) | TEST_F(TestTypes, int8)
function TEST_F (line 46) | TEST_F(TestTypes, uint8)
function TEST_F (line 53) | TEST_F(TestTypes, fp32)
function TEST_F (line 60) | TEST_F(TestTypes, ctors_and_assignment)
function TEST_F (line 85) | TEST_F(TestTypes, Equivalence)
function TEST_F (line 101) | TEST_F(TestTypes, TypeVsObject)
function TEST_F (line 106) | TEST_F(TestTypes, ArbituaryDLDataTypes)
function TEST_F (line 135) | TEST_F(TestTypes, CheckAllForCompatibility)
FILE: trtlab/cuda/benchmarks/bench_cuda_memory.cc
type bench (line 39) | namespace bench
function run_allocate_node_x1 (line 43) | static void run_allocate_node_x1(benchmark::State& state, Allocator& a...
function run_allocate_node_x10 (line 54) | static void run_allocate_node_x10(benchmark::State& state, Allocator& ...
function run_allocate_descriptor_x1 (line 94) | static void run_allocate_descriptor_x1(benchmark::State& state, Alloca...
function run_allocate_descriptor_x10 (line 103) | static void run_allocate_descriptor_x10(benchmark::State& state, Alloc...
function make_transactional_allocator (line 120) | static auto make_transactional_allocator()
function memory_cuda_malloc_raw_x1 (line 139) | static void memory_cuda_malloc_raw_x1(benchmark::State& state)
function memory_cuda_malloc_raw_x10 (line 146) | static void memory_cuda_malloc_raw_x10(benchmark::State& state)
function memory_transactional_raw_x1 (line 155) | static void memory_transactional_raw_x1(benchmark::State& state)
function memory_transactional_raw_x10 (line 161) | static void memory_transactional_raw_x10(benchmark::State& state)
function memory_transactional_allocator_x1 (line 167) | static void memory_transactional_allocator_x1(benchmark::State& state)
function memory_transactional_allocator_x10 (line 174) | static void memory_transactional_allocator_x10(benchmark::State& state)
function memory_transactional_descriptor_x1 (line 181) | static void memory_transactional_descriptor_x1(benchmark::State& state)
function memory_transactional_descriptor_x10 (line 188) | static void memory_transactional_descriptor_x10(benchmark::State& state)
FILE: trtlab/cuda/include/trtlab/cuda/cyclic_windowed_buffer.h
function namespace (line 8) | namespace trtlab
FILE: trtlab/cuda/include/trtlab/cuda/device_guard.h
function namespace (line 32) | namespace trtlab
FILE: trtlab/cuda/include/trtlab/cuda/device_info.h
function namespace (line 33) | namespace trtlab
FILE: trtlab/cuda/include/trtlab/cuda/memory/cuda_allocators.h
function namespace (line 38) | namespace trtlab
function deallocate_node (line 113) | static void deallocate_node(void* ptr, std::size_t, std::size_t) noexcept
function allocator_info (line 119) | static allocator_info info()
function namespace (line 130) | namespace cuda_detail
FILE: trtlab/cuda/include/trtlab/cuda/memory/device_memory.h
function namespace (line 32) | namespace trtlab
FILE: trtlab/cuda/include/trtlab/cuda/sync.h
function namespace (line 11) | namespace trtlab
FILE: trtlab/cuda/src/copy.cc
function Copy (line 40) | void Copy(CoreMemory& dst, size_t dst_offset, const CoreMemory& src, siz...
type trtlab (line 49) | namespace trtlab {
function Copy (line 51) | void Copy(HostMemory& dst, size_t dst_offset, const DeviceMemory& src,...
function Copy (line 57) | void Copy(DeviceMemory& dst, size_t dst_offset, const HostMemory& src,...
function Copy (line 63) | void Copy(DeviceMemory& dst, size_t dst_offset, const DeviceMemory& sr...
FILE: trtlab/cuda/src/cuda_allocators.cc
type trtlab (line 29) | namespace trtlab
type memory (line 31) | namespace memory
type trtlab (line 33) | namespace trtlab
type cuda_detail (line 35) | namespace cuda_detail
FILE: trtlab/cuda/src/device_guard.cc
type trtlab (line 34) | namespace trtlab {
FILE: trtlab/cuda/src/device_info.cc
type nvmlState (line 41) | struct nvmlState
method nvmlState (line 43) | nvmlState()
function nvmlDevice_t (line 55) | nvmlDevice_t GetHandleById(unsigned int device_id)
type trtlab (line 64) | namespace trtlab
function cpu_set (line 66) | cpu_set DeviceInfo::Affinity(int device_id)
type cudaDeviceProp (line 89) | struct cudaDeviceProp
function nvmlMemory_t (line 127) | nvmlMemory_t DeviceInfo::MemoryInfo(int device_id)
type cuda (line 134) | namespace cuda
type nvml (line 136) | namespace nvml
function device_count (line 138) | std::size_t device_count()
function nvmlMemory_t (line 143) | nvmlMemory_t memory_info(int device_id)
FILE: trtlab/cuda/tests/test_allocators.cc
class TestCudaAllocators (line 46) | class TestCudaAllocators : public ::testing::Test
function TEST_F (line 50) | TEST_F(TestCudaAllocators, cudaMalloc)
function TEST_F (line 64) | TEST_F(TestCudaAllocators, TrasactionalCudaMalloc)
FILE: trtlab/cuda/tests/test_device_info.cc
class TestDeviceInfo (line 33) | class TestDeviceInfo : public ::testing::Test
method SetUp (line 36) | virtual void SetUp() {}
method TearDown (line 38) | virtual void TearDown() {}
function TEST_F (line 41) | TEST_F(TestDeviceInfo, Affinity)
FILE: trtlab/cuda/tests/test_main.cc
function main (line 31) | int main(int argc, char **argv) {
FILE: trtlab/cuda/tests/test_memory.cc
class TestMemory (line 45) | class TestMemory : public ::testing::Test
function TYPED_TEST (line 53) | TYPED_TEST(TestMemory, make_shared)
function TYPED_TEST (line 72) | TYPED_TEST(TestMemory, make_unique)
function TYPED_TEST (line 81) | TYPED_TEST(TestMemory, ctor)
FILE: trtlab/memory/benchmarks/bench_memory.cc
type bench (line 38) | namespace bench
function run_allocate_node_x1 (line 41) | static void run_allocate_node_x1(benchmark::State& state, Allocator& a...
function run_allocate_node_x10 (line 54) | static void run_allocate_node_x10(benchmark::State& state, Allocator& ...
function run_allocate_descriptor_x1 (line 87) | static void run_allocate_descriptor_x1(benchmark::State& state, Alloca...
function run_allocate_descriptor_x10 (line 98) | static void run_allocate_descriptor_x10(benchmark::State& state, Alloc...
function make_transactional_allocator (line 117) | static auto make_transactional_allocator()
function make_bfit_allocator (line 133) | static auto make_bfit_allocator()
function memory_malloc_raw_x1 (line 144) | static void memory_malloc_raw_x1(benchmark::State& state)
function memory_malloc_raw_x10 (line 150) | static void memory_malloc_raw_x10(benchmark::State& state)
function memory_transactional_raw_x1 (line 158) | static void memory_transactional_raw_x1(benchmark::State& state)
function memory_transactional_raw_x10 (line 164) | static void memory_transactional_raw_x10(benchmark::State& state)
function memory_transactional_allocator_x1 (line 170) | static void memory_transactional_allocator_x1(benchmark::State& state)
function memory_transactional_allocator_x10 (line 177) | static void memory_transactional_allocator_x10(benchmark::State& state)
function memory_transactional_descriptor_x1 (line 184) | static void memory_transactional_descriptor_x1(benchmark::State& state)
function memory_transactional_descriptor_x10 (line 191) | static void memory_transactional_descriptor_x10(benchmark::State& state)
function memory_bfit_raw_x10 (line 198) | static void memory_bfit_raw_x10(benchmark::State& state)
function memory_bfit_allocator_x10 (line 204) | static void memory_bfit_allocator_x10(benchmark::State& state)
function memory_bfit_descriptor_x10 (line 211) | static void memory_bfit_descriptor_x10(benchmark::State& state)
FILE: trtlab/memory/benchmarks/bench_memory_pool.cc
function memory_pool_map_default (line 41) | static void memory_pool_map_default(benchmark::State& state)
function memory_pool_map_malloc_raw (line 56) | static void memory_pool_map_malloc_raw(benchmark::State& state)
function make_map (line 75) | auto make_map(BlockAllocator&& block_alloc)
function memory_pool_map_malloc_pooled (line 90) | static void memory_pool_map_malloc_pooled(benchmark::State& state)
FILE: trtlab/memory/include/trtlab/memory/align.h
function namespace (line 19) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/allocator.h
function namespace (line 35) | namespace trtlab
function deallocate_node (line 206) | void deallocate_node(void* ptr, std::size_t size, std::size_t alignment)...
function deallocate_array (line 212) | void deallocate_array(void* ptr, std::size_t count, std::size_t size, st...
function DLContext (line 218) | DLContext device_context() const
function noexcept (line 262) | auto use_count() const noexcept
FILE: trtlab/memory/include/trtlab/memory/allocator_storage.h
function namespace (line 24) | namespace trtlab
function deallocate_node (line 193) | void deallocate_node(void* ptr, std::size_t size, std::size_t alignment)...
function deallocate_array (line 202) | void deallocate_array(void* ptr, std::size_t count, std::size_t size, st...
function try_deallocate_node (line 277) | TRTLAB_ENABLE_IF(composable::value)
function try_deallocate_array (line 286) | TRTLAB_ENABLE_IF(composable::value)
function get_allocator (line 300) | auto get_allocator() noexcept -> decltype(std::declval<storage_policy>()...
function get_allocator (line 305) | auto get_allocator() const noexcept -> decltype(std::declval<const stora...
function is_composable (line 333) | bool is_composable() const noexcept
type any_allocator (line 356) | struct any_allocator
function allocator_type (line 397) | const allocator_type& get_allocator() const noexcept
function namespace (line 460) | namespace detail
function storage (line 597) | reference_storage(const allocator_type& alloc) noexcept : storage(alloc) {}
function explicit (line 613) | explicit operator bool() const noexcept
function class (line 641) | class base_allocator
function noexcept (line 836) | const noexcept
function clone (line 841) | void clone(void* storage) const noexcept override
function deallocate_impl (line 855) | void deallocate_impl(void* ptr, std::size_t count, std::size_t size, std...
function try_deallocate_impl (line 873) | bool try_deallocate_impl(void* ptr, std::size_t count, std::size_t size,...
FILE: trtlab/memory/include/trtlab/memory/allocator_traits.h
function namespace (line 29) | namespace trtlab
function max_node_size (line 244) | size_t max_node_size(min_concept, const Allocator&) noexcept
function max_array_size (line 256) | size_t max_array_size(min_concept, const Allocator& alloc)
function memory_type (line 266) | memory_type(error)
function min_alignment (line 280) | size_t min_alignment(min_concept, const Allocator&)
function max_alignment (line 292) | size_t max_alignment(min_concept, const Allocator& alloc)
function deallocate_node (line 345) | static void deallocate_node(allocator_type& state, void* node, std::size...
function deallocate_array (line 352) | static void deallocate_array(allocator_type& state, void* array, std::si...
function deallocate_array (line 360) | static void deallocate_array(allocator_type& state, void* array, std::si...
function max_node_size (line 368) | static std::size_t max_node_size(const allocator_type& state)
function max_array_size (line 375) | static std::size_t max_array_size(const allocator_type& state)
function min_alignment (line 382) | static std::size_t min_alignment(const allocator_type& state)
function max_alignment (line 389) | static std::size_t max_alignment(const allocator_type& state)
function DLContext (line 396) | static DLContext device_context(const allocator_type& state)
function namespace (line 408) | namespace detail
function namespace (line 453) | namespace traits_detail
function try_deallocate_node (line 540) | static bool try_deallocate_node(allocator_type& state, void* node, std::...
function try_deallocate_array (line 546) | static bool try_deallocate_array(allocator_type& state, void* array, std...
function namespace (line 558) | namespace detail
FILE: trtlab/memory/include/trtlab/memory/bfit_allocator.h
function namespace (line 16) | namespace bfit_detail
function allow_growth (line 112) | bool allow_growth() const;
FILE: trtlab/memory/include/trtlab/memory/block_allocators.h
function namespace (line 17) | namespace trtlab
function typename (line 493) | typename... Args>
FILE: trtlab/memory/include/trtlab/memory/block_arena.h
function namespace (line 38) | namespace trtlab
function take_from_cache (line 147) | bool take_from_cache(memory_block& block) noexcept
function memory_block (line 216) | memory_block allocate_block()
function deallocate_block (line 226) | void deallocate_block(memory_block block)
function shrink_to_fit (line 245) | void shrink_to_fit() noexcept
function reserve_blocks (line 250) | void reserve_blocks(std::size_t block_count) noexcept
function DLContext (line 255) | DLContext device_context() const noexcept
function allocator_type (line 267) | const allocator_type& get_allocator() const noexcept
function block_allocator_type (line 277) | const block_allocator_type& get_block_allocator() const noexcept
FILE: trtlab/memory/include/trtlab/memory/block_manager.h
function namespace (line 37) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/block_stack.h
function namespace (line 20) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/debugging.h
function namespace (line 15) | namespace memory
FILE: trtlab/memory/include/trtlab/memory/deleter.h
function namespace (line 19) | namespace trtlab
function noexcept (line 146) | void operator()(value_type* pointer) noexcept
function get_allocator (line 153) | auto get_allocator() const noexcept
function noexcept (line 192) | void operator()(value_type* pointer) noexcept
function get_allocator (line 200) | auto get_allocator() const noexcept
function noexcept (line 234) | void operator()(value_type* pointer) noexcept
function get_allocator (line 245) | auto get_allocator() const noexcept
function noexcept (line 287) | void operator()(value_type* pointer) noexcept
function get_allocator (line 295) | auto get_allocator() const noexcept
FILE: trtlab/memory/include/trtlab/memory/descriptor.h
function namespace (line 34) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/detail/block_list.h
function namespace (line 13) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/detail/debug_helpers.h
function namespace (line 13) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/detail/free_list.h
function namespace (line 15) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/detail/memory_stack.h
function namespace (line 15) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/detail/page_info.h
type page_info (line 43) | typedef struct {
type page_info_array (line 65) | typedef struct {
type flag_count (line 74) | typedef struct {
FILE: trtlab/memory/include/trtlab/memory/detail/ranges.h
function namespace (line 33) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/detail/utility.h
function namespace (line 18) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/error.h
function namespace (line 18) | namespace trtlab
function class (line 68) | class out_of_memory : public std::bad_alloc
function class (line 124) | class out_of_fixed_memory : public out_of_memory
function class (line 150) | class bad_allocation_size : public std::bad_alloc
function class (line 214) | class bad_node_size : public bad_allocation_size
function class (line 232) | class bad_array_size : public bad_allocation_size
function class (line 250) | class bad_alignment : public bad_allocation_size
function namespace (line 265) | namespace detail
FILE: trtlab/memory/include/trtlab/memory/huge_page_allocator.h
function namespace (line 5) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/literals.h
function namespace (line 40) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/malloc_allocator.h
function namespace (line 35) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/memory_block.h
function namespace (line 12) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/memory_pool.h
function namespace (line 21) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/memory_resource.h
function namespace (line 9) | namespace trtlab {
function class (line 156) | class raii_memory_resource
function class (line 166) | class device_context_interface
FILE: trtlab/memory/include/trtlab/memory/memory_type.h
function namespace (line 36) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/memory_typed_allocator.h
function namespace (line 35) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/posix_aligned_allocator.h
function namespace (line 9) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/raii_allocator.h
function namespace (line 35) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/smart_ptr.h
function namespace (line 21) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/std_allocator.h
function namespace (line 23) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/threading.h
function namespace (line 20) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/trackers.h
function namespace (line 6) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/tracking.h
function namespace (line 17) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/transactional_allocator.h
function namespace (line 43) | namespace trtlab
FILE: trtlab/memory/include/trtlab/memory/utils.h
function namespace (line 5) | namespace trtlab
FILE: trtlab/memory/src/descriptor.cc
function DLContext (line 22) | DLContext descriptor::device_context() const
FILE: trtlab/memory/src/detail/block_list.cc
function block_list (line 26) | block_list& block_list::operator=(block_list&& other) noexcept
function memory_block (line 52) | memory_block block_list::allocate() noexcept
function block_list_oob (line 79) | block_list_oob& block_list_oob::operator=(block_list_oob&& other) noexcept
function memory_block (line 99) | memory_block block_list_oob::allocate() noexcept
FILE: trtlab/memory/src/detail/free_list.cc
type interval (line 21) | struct interval
method size (line 29) | std::size_t size(std::size_t node_size) const noexcept
function interval (line 43) | interval list_search_array(char* first, std::size_t bytes_needed, std::s...
method size (line 29) | std::size_t size(std::size_t node_size) const noexcept
function interval (line 83) | interval xor_list_search_array(char* begin, char* end, std::size_t bytes...
method size (line 29) | std::size_t size(std::size_t node_size) const noexcept
function free_memory_list (line 145) | free_memory_list& free_memory_list::operator=(free_memory_list&& other) ...
function xor_link_block (line 244) | void xor_link_block(void* memory, std::size_t node_size, std::size_t no_...
type pos (line 261) | struct pos
function pos (line 268) | pos find_pos_interval(const allocator_info& info, char* memory, char* fi...
function pos (line 300) | pos find_pos(const allocator_info& info, char* memory, char* begin_node,...
FILE: trtlab/memory/src/detail/free_list_utils.h
function namespace (line 17) | namespace trtlab
FILE: trtlab/memory/src/detail/page_info.c
type flag (line 59) | typedef struct {
function get_page_size (line 119) | static unsigned get_page_size() {
function page_info (line 133) | page_info extract_info(uint64_t bits) {
function fprint_info (line 145) | void fprint_info(FILE* f, page_info info) {
function print_info (line 161) | void print_info(page_info info) {
function flag_count (line 165) | flag_count get_flag_count(page_info_array infos, int flag_num) {
function fprint_info_header (line 192) | void fprint_info_header(FILE *file) {
function fprint_info_row (line 199) | void fprint_info_row(FILE *file, page_info info) {
function fprint_ratios_noheader (line 219) | void fprint_ratios_noheader(FILE *file, page_info_array infos) {
function fprint_ratios (line 251) | void fprint_ratios(FILE *file, page_info_array infos) {
function fprint_table (line 260) | void fprint_table(FILE *f, page_info_array infos) {
function page_info (line 273) | page_info get_page_info(void *p) {
function page_info_array (line 285) | page_info_array get_info_for_range(void *start, void *end) {
function free_info_array (line 353) | void free_info_array(page_info_array infos) {
function flag_from_name (line 357) | int flag_from_name(char const *name) {
FILE: trtlab/memory/src/error.cc
function default_out_of_memory_handler (line 16) | void default_out_of_memory_handler(const allocator_info& info, std::size...
function default_bad_alloc_size_handler (line 56) | void default_bad_alloc_size_handler(const allocator_info& info, std::siz...
FILE: trtlab/memory/src/ilog2.h
function namespace (line 17) | namespace trtlab
FILE: trtlab/memory/src/trackers.cc
type size_tracker::impl (line 8) | struct size_tracker::impl
FILE: trtlab/memory/src/utils.cc
type trtlab (line 37) | namespace trtlab
type memory (line 39) | namespace memory
function bytes_to_string (line 41) | std::string bytes_to_string(size_t bytes)
function string_to_bytes (line 56) | std::uint64_t string_to_bytes(const std::string str)
FILE: trtlab/memory/tests/test_main.cc
function main (line 31) | int main(int argc, char **argv) {
FILE: trtlab/memory/tests/test_memory.cc
class TestMemory (line 47) | class TestMemory : public ::testing::Test
type trtlab (line 53) | namespace trtlab
type memory (line 55) | namespace memory
function make_vector (line 63) | auto make_vector(RawAllocator&& alloc)
function make_vector (line 69) | auto make_vector(allocator<RawAllocator, Mutex> alloc)
type log_tracker (line 74) | struct log_tracker
method on_node_allocation (line 76) | void on_node_allocation(void* ptr, std::size_t size, std::size_t align...
method on_node_deallocation (line 81) | void on_node_deallocation(void* ptr, std::size_t size, std::size_t ali...
method on_array_allocation (line 86) | void on_array_allocation(void* ptr, std::size_t count, std::size_t siz...
method on_array_deallocation (line 91) | void on_array_deallocation(void* ptr, std::size_t count, std::size_t s...
type empty_tracker (line 99) | struct empty_tracker
method on_node_allocation (line 101) | void on_node_allocation(void* ptr, std::size_t size, std::size_t align...
method on_node_deallocation (line 106) | void on_node_deallocation(void* ptr, std::size_t size, std::size_t ali...
method on_array_allocation (line 111) | void on_array_allocation(void* ptr, std::size_t count, std::size_t siz...
method on_array_deallocation (line 116) | void on_array_deallocation(void* ptr, std::size_t count, std::size_t s...
type counting_tracker (line 122) | struct counting_tracker
method counting_tracker (line 124) | counting_tracker(std::string n) : name(n), m_node_count(0), m_node_byt...
method counting_tracker (line 127) | counting_tracker(const counting_tracker&) = default;
method counting_tracker (line 128) | counting_tracker& operator=(const counting_tracker&) = default;
method counting_tracker (line 130) | counting_tracker(counting_tracker&&) noexcept = default;
method counting_tracker (line 131) | counting_tracker& operator=(counting_tracker&&) noexcept = default;
method on_node_allocation (line 133) | void on_node_allocation(void* ptr, std::size_t size, std::size_t align...
method on_node_deallocation (line 141) | void on_node_deallocation(void* ptr, std::size_t size, std::size_t ali...
method on_array_allocation (line 149) | void on_array_allocation(void* ptr, std::size_t count, std::size_t siz...
method on_array_deallocation (line 157) | void on_array_deallocation(void* ptr, std::size_t count, std::size_t s...
method count (line 165) | std::size_t count() const noexcept
method bytes (line 170) | std::size_t bytes() const noexcept
method node_count (line 175) | std::size_t node_count() const noexcept
method node_bytes (line 179) | std::size_t node_bytes() const noexcept
method array_count (line 183) | std::size_t array_count() const noexcept
method array_bytes (line 187) | std::size_t array_bytes() const noexcept
function test_alloc_x10 (line 201) | static void test_alloc_x10(Allocator& alloc)
type TestRawMalloc (line 226) | struct TestRawMalloc
method deallocate_node (line 236) | static void deallocate_node(void* ptr, std::size_t, std::size_t) noexcept
function TEST_F (line 242) | TEST_F(TestMemory, BasicTraits)
function TEST_F (line 255) | TEST_F(TestMemory, ReferenceStorage)
function TEST_F (line 269) | TEST_F(TestMemory, VectorWithTracking)
function TEST_F (line 307) | TEST_F(TestMemory, SingleBlockAllocatorFromType)
function TEST_F (line 316) | TEST_F(TestMemory, SingleBlockAllocatorFromObj)
function TEST_F (line 330) | TEST_F(TestMemory, FixedSizedBlockAllocatorFromType)
function TEST_F (line 338) | TEST_F(TestMemory, FixedSizedBlockAllocatorFromObj)
function TEST_F (line 352) | TEST_F(TestMemory, GrowingdBlockAllocatorFromType)
function TEST_F (line 360) | TEST_F(TestMemory, GrowingdBlockAllocatorFromObj)
function TEST_F (line 375) | TEST_F(TestMemory, CountLimitedFixedSizeBlockAllocatorFromType)
function TEST_F (line 383) | TEST_F(TestMemory, CountLimitedFixedSizeBlockAllocatorFromObj)
function TEST_F (line 400) | TEST_F(TestMemory, SizeLimitedFixedSizeBlockAllocatorFromObj)
function TEST_F (line 417) | TEST_F(TestMemory, SmartPtrsWithStatelessRawAllocator)
function TEST_F (line 437) | TEST_F(TestMemory, SmartPtrsWithStatefulRawAllocator)
function TEST_F (line 457) | TEST_F(TestMemory, MemoryDescriptors)
function TEST_F (line 488) | TEST_F(TestMemory, AllocatorTraits)
function TEST_F (line 499) | TEST_F(TestMemory, CachedBlockArena)
function TEST_F (line 545) | TEST_F(TestMemory, DetailStack)
function TEST_F (line 550) | TEST_F(TestMemory, TransactionalAllocator)
function TEST_F (line 607) | TEST_F(TestMemory, TransparentHugePages)
function is_equal (line 641) | bool is_equal(void* a, void* b)
function TEST_F (line 646) | TEST_F(TestMemory, TestFreeList)
function TEST_F (line 688) | TEST_F(TestMemory, MemoryArenaUncached)
function TEST_F (line 731) | TEST_F(TestMemory, MemoryPool)
function TEST_F (line 758) | TEST_F(TestMemory, IsMemoryType)
function TEST_F (line 815) | TEST_F(TestMemory, HostMemory)
function TEST_F (line 861) | TEST_F(TestMemory, FirstTouchMallocAllocator)
function TEST_F (line 896) | TEST_F(TestMemory, FindRanges0)
function TEST_F (line 905) | TEST_F(TestMemory, FindRanges1)
function TEST_F (line 914) | TEST_F(TestMemory, FindRanges2)
function TEST_F (line 923) | TEST_F(TestMemory, FindRanges3)
function TEST_F (line 932) | TEST_F(TestMemory, FindRanges4)
function TEST_F (line 941) | TEST_F(TestMemory, FindRanges5)
function make_map (line 951) | auto make_map(BlockAllocator&& block_alloc)
function TEST_F (line 965) | TEST_F(TestMemory, MapWithCustomAllocator)
function TEST_F (line 997) | TEST_F(TestMemory, MapWithTracedMalloc)
function equiv_ptr (line 1019) | bool equiv_ptr(void* lhs, void* rhs)
function TEST_F (line 1024) | TEST_F(TestMemory, RBTree_Set)
function TEST_F (line 1070) | TEST_F(TestMemory, bfit)
function TEST_F (line 1091) | TEST_F(TestMemory, histogram)
function TEST_F (line 1134) | TEST_F(TestMemory, TrackHighLevelAllocator)
function TEST_F (line 1186) | TEST_F(TestMemory, IAllocator)
FILE: trtlab/memory/tools/node_size_debugger.cpp
type simple_serializer (line 17) | struct simple_serializer
method prefix (line 21) | void prefix() const {}
method suffix (line 30) | void suffix() const {}
type verbose_serializer (line 33) | struct verbose_serializer
method prefix (line 37) | void prefix() const {}
method suffix (line 47) | void suffix() const {}
type code_serializer (line 50) | struct code_serializer
method prefix (line 56) | void prefix() const
method suffix (line 99) | void suffix() const
method tab (line 104) | std::string tab() const
method struct_name (line 111) | std::string struct_name(const char* container_name) const
function serialize_single (line 124) | void serialize_single(const Serializer& serializer)
function serialize_impl (line 132) | int serialize_impl(const Serializer& serializer)
function serialize_impl (line 139) | void serialize_impl(const Serializer& serializer, std::tuple<Debuggers...>)
function serialize (line 146) | void serialize(const Serializer& serializer)
function print_help (line 153) | void print_help(std::ostream& out)
function print_version (line 179) | void print_version(std::ostream& out)
function print_invalid_option (line 184) | int print_invalid_option(std::ostream& out, const char* option)
function print_invalid_argument (line 194) | int print_invalid_argument(std::ostream& out, const char* option)
function main (line 201) | int main(int argc, char* argv[])
FILE: trtlab/memory/tools/node_size_debugger.hpp
type node_size_storage (line 24) | struct node_size_storage
type empty_payload (line 32) | struct empty_payload
class node_size_debugger (line 43) | class node_size_debugger : public std::allocator<T>, private AdditionalP...
type rebind (line 47) | struct rebind
method node_size_debugger (line 52) | node_size_debugger()
method node_size_debugger (line 59) | node_size_debugger(node_size_debugger<U, TestType, Debugger, Additiona...
type hash (line 75) | struct hash
type debug_forward_list (line 86) | struct debug_forward_list
method debug (line 94) | std::size_t debug()
type debug_list (line 104) | struct debug_list
method debug (line 112) | std::size_t debug()
type debug_set (line 122) | struct debug_set
method debug (line 130) | std::size_t debug()
type debug_multiset (line 140) | struct debug_multiset
method debug (line 148) | std::size_t debug()
type debug_unordered_set (line 158) | struct debug_unordered_set
method debug (line 166) | std::size_t debug()
type debug_unordered_multiset (line 177) | struct debug_unordered_multiset
method debug (line 185) | std::size_t debug()
type debug_map (line 197) | struct debug_map
method debug (line 205) | std::size_t debug()
type debug_multimap (line 216) | struct debug_multimap
method debug (line 224) | std::size_t debug()
type debug_unordered_map (line 235) | struct debug_unordered_map
method debug (line 243) | std::size_t debug()
type debug_unordered_multimap (line 256) | struct debug_unordered_multimap
method debug (line 264) | std::size_t debug()
type debug_shared_ptr_stateless (line 277) | struct debug_shared_ptr_stateless
method debug (line 285) | std::size_t debug()
type debug_shared_ptr_stateful (line 299) | struct debug_shared_ptr_stateful
method debug (line 307) | std::size_t debug()
type debug_shared_ptr_stateful_mutex (line 322) | struct debug_shared_ptr_stateful_mutex
method debug (line 330) | std::size_t debug()
function debug_single (line 348) | std::size_t debug_single(Debugger debugger)
type debug_result (line 361) | struct debug_result
function node_size_map (line 368) | node_size_map debug_impl(Debugger debugger, std::tuple<Types...>)
function debug_result (line 377) | debug_result debug(Debugger debugger)
FILE: trtlab/memory/tools/test_types.hpp
type detail (line 18) | namespace detail
type M1 (line 41) | struct M1
type M2 (line 43) | struct M2
type M3 (line 45) | struct M3
type M4 (line 47) | struct M4
type M5 (line 49) | struct M5
type M6 (line 51) | struct M6
type M1<> (line 82) | struct M1<>
type M2 (line 93) | struct M2 : W
type M3 (line 100) | struct M3
type M5 (line 120) | struct M5
type M6 (line 154) | struct M6 : M6<S, A * 2, B, Z..., A>
type M1<X, T...> (line 65) | struct M1<X, T...> : M1<T...>
type M4<S, W<A, X...>, W<Y...>> (line 108) | struct M4<S, W<A, X...>, W<Y...>>
type M4<S, W<>, W<Y...>> (line 114) | struct M4<S, W<>, W<Y...>>
type M5<W<A...>, W<X, B...>> (line 129) | struct M5<W<A...>, W<X, B...>> : M5<W<A..., X>, W<B...>>
type M5<W<A...>, W<M1<>, B...>> (line 134) | struct M5<W<A...>, W<M1<>, B...>> : M5<W<A...>, W<B...>>
type M5<W<A...>, W<>> (line 139) | struct M5<W<A...>, W<>> : M5<M1<A...>>
type M6<S, A, A, Z...> (line 159) | struct M6<S, A, A, Z...>
FILE: trtlab/nvrpc/include/nvrpc/client/base_context.h
function namespace (line 29) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/client/client_single_up_multiple_down.h
function namespace (line 39) | namespace nvrpc {
function CallbackOnResponseReceived (line 200) | void CallbackOnResponseReceived(Response&& response) final override
function CallbackOnComplete (line 205) | void CallbackOnComplete(const ::grpc::Status& status) final override
FILE: trtlab/nvrpc/include/nvrpc/client/client_streaming.h
function namespace (line 39) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/client/client_streaming_v2.h
function namespace (line 39) | namespace nvrpc
FILE: trtlab/nvrpc/include/nvrpc/client/client_streaming_v3.h
function namespace (line 39) | namespace nvrpc
FILE: trtlab/nvrpc/include/nvrpc/client/client_unary.h
function namespace (line 38) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/client/client_unary_v2.h
function namespace (line 39) | namespace nvrpc
FILE: trtlab/nvrpc/include/nvrpc/client/executor.h
function namespace (line 36) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/context.h
function namespace (line 39) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/executor.h
function Initialize (line 47) | void Initialize(::grpc::ServerBuilder& builder) final override
function RegisterContexts (line 55) | void RegisterContexts(IRPC* rpc, std::shared_ptr<::trtlab::Resources> re...
FILE: trtlab/nvrpc/include/nvrpc/fiber/executor.h
function namespace (line 32) | namespace nvrpc
FILE: trtlab/nvrpc/include/nvrpc/interfaces.h
function namespace (line 34) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/life_cycle_batching.h
function namespace (line 32) | namespace nvrpc {
function virtual (line 87) | virtual ~LifeCycleBatching() override {}
function SetQueueFunc (line 91) | void SetQueueFunc(ExecutorQueueFuncType q_fn) { m_QueuingFunc = q_fn; }
function virtual (line 94) | virtual void OnRequestReceived(const RequestType&) {}
function Reset (line 102) | void Reset() final override;
FILE: trtlab/nvrpc/include/nvrpc/life_cycle_bidirectional.h
function namespace (line 35) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/life_cycle_streaming.h
function namespace (line 34) | namespace nvrpc
FILE: trtlab/nvrpc/include/nvrpc/life_cycle_unary.h
function namespace (line 30) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/rpc.h
function namespace (line 33) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/server.h
function namespace (line 36) | namespace nvrpc {
FILE: trtlab/nvrpc/include/nvrpc/service.h
function namespace (line 32) | namespace nvrpc {
FILE: trtlab/nvrpc/src/client/client_executor.cc
type nvrpc (line 34) | namespace nvrpc {
type client (line 35) | namespace client {
FILE: trtlab/nvrpc/src/executor.cc
type nvrpc (line 36) | namespace nvrpc {
FILE: trtlab/nvrpc/src/server.cc
function signal_handler (line 36) | void signal_handler(int signal) { shutdown_handler(signal); }
type nvrpc (line 39) | namespace nvrpc {
FILE: trtlab/nvrpc/tests/test_build_client.h
function namespace (line 40) | namespace nvrpc {
FILE: trtlab/nvrpc/tests/test_build_server.h
function namespace (line 37) | namespace nvrpc {
FILE: trtlab/nvrpc/tests/test_pingpong.cc
class PingPongTest (line 165) | class PingPongTest : public ::testing::Test
method SetUp (line 167) | void SetUp() override {}
method TearDown (line 169) | void TearDown() override
function TEST_F (line 182) | TEST_F(PingPongTest, UnaryTest)
function TEST_F (line 230) | TEST_F(PingPongTest, FibersUnaryTest)
function TEST_F (line 300) | TEST_F(PingPongTest, StreamingTest)
function TEST_F (line 344) | TEST_F(PingPongTest, FibersStreamingTest)
function TEST_F (line 411) | TEST_F(PingPongTest, ServerEarlyFinish)
function TEST_F (line 454) | TEST_F(PingPongTest, ServerEarlyCancel)
FILE: trtlab/nvrpc/tests/test_pingpong.h
function namespace (line 36) | namespace nvrpc {
FILE: trtlab/nvrpc/tests/test_resources.cc
type nvrpc (line 29) | namespace nvrpc {
type testing (line 30) | namespace testing {
FILE: trtlab/nvrpc/tests/test_resources.h
function namespace (line 37) | namespace nvrpc {
FILE: trtlab/nvrpc/tests/test_server.cc
class ServerTest (line 36) | class ServerTest : public ::testing::Test
method SetUp (line 38) | void SetUp() override
method TearDown (line 44) | void TearDown() override
function TEST_F (line 59) | TEST_F(ServerTest, AsyncStartAndShutdown)
function TEST_F (line 68) | TEST_F(ServerTest, RunAndShutdown)
FILE: trtlab/pybind/trtlab/infer.cc
class TrtisModel (line 79) | class TrtisModel
method TrtisModel (line 216) | TrtisModel(const ::trtis::ModelConfig& model)
method GetMaxBatchSize (line 265) | int GetMaxBatchSize() const final override { return m_MaxBatchSize; }
class PyInferRunner (line 80) | class PyInferRunner
method Infer (line 412) | auto Infer(py::kwargs kwargs)
method InputBindings (line 515) | py::dict InputBindings() const
method OutputBindings (line 525) | py::dict OutputBindings() const
method AddBindingInfo (line 536) | void AddBindingInfo(py::dict& dict, int id) const
class PyInferRemoteRunner (line 81) | class PyInferRemoteRunner
method PyInferRemoteRunner (line 273) | PyInferRemoteRunner(
method BaseModel (line 284) | const BaseModel& GetModel() const { return *m_Model; }
method InferFuture (line 286) | InferFuture Infer(py::kwargs kwargs)
method InputBindings (line 348) | py::dict InputBindings() const
method OutputBindings (line 358) | py::dict OutputBindings() const
method ConvertResponseToNumpy (line 369) | py::dict ConvertResponseToNumpy(const ::trtis::InferResponse& response)
method AddBindingInfo (line 390) | void AddBindingInfo(py::dict& dict, int id) const
class PyInferenceManager (line 83) | class PyInferenceManager final : public InferenceManager
method PyInferenceManager (line 86) | PyInferenceManager(int max_executions, int max_buffers, int pre_thread...
method RegisterModelByPath (line 100) | std::shared_ptr<PyInferRunner> RegisterModelByPath(const std::string& ...
method InferRunner (line 108) | std::shared_ptr<PyInferRunner> InferRunner(std::string name)
method Serve (line 114) | void Serve(int port) { BasicInferService(casted_shared_from_this<PyInf...
method Models (line 116) | std::vector<std::string> Models()
class PyRemoteInferenceManager (line 124) | class PyRemoteInferenceManager
method PyRemoteInferenceManager (line 127) | PyRemoteInferenceManager(py::kwargs kwargs)
method Models (line 148) | std::vector<std::string> Models()
method InferRunner (line 172) | std::shared_ptr<PyInferRemoteRunner> InferRunner(const std::string& mo...
method GetModel (line 188) | std::shared_ptr<TrtisModel> GetModel(const std::string& name) const
method TrtisStatus (line 196) | ::trtis::StatusResponse TrtisStatus()
type TrtisModel (line 214) | struct TrtisModel : BaseModel
method TrtisModel (line 216) | TrtisModel(const ::trtis::ModelConfig& model)
method GetMaxBatchSize (line 265) | int GetMaxBatchSize() const final override { return m_MaxBatchSize; }
type PyInferRemoteRunner (line 271) | struct PyInferRemoteRunner
method PyInferRemoteRunner (line 273) | PyInferRemoteRunner(
method BaseModel (line 284) | const BaseModel& GetModel() const { return *m_Model; }
method InferFuture (line 286) | InferFuture Infer(py::kwargs kwargs)
method InputBindings (line 348) | py::dict InputBindings() const
method OutputBindings (line 358) | py::dict OutputBindings() const
method ConvertResponseToNumpy (line 369) | py::dict ConvertResponseToNumpy(const ::trtis::InferResponse& response)
method AddBindingInfo (line 390) | void AddBindingInfo(py::dict& dict, int id) const
type PyInferRunner (line 406) | struct PyInferRunner : public InferRunner
method Infer (line 412) | auto Infer(py::kwargs kwargs)
method InputBindings (line 515) | py::dict InputBindings() const
method OutputBindings (line 525) | py::dict OutputBindings() const
method AddBindingInfo (line 536) | void AddBindingInfo(py::dict& dict, int id) const
class StatusContext (line 547) | class StatusContext final
method ExecuteRPC (line 550) | void ExecuteRPC(::trtis::StatusRequest& request,
class InferContext (line 596) | class InferContext final
method ExecuteRPC (line 599) | void ExecuteRPC(RequestType& input, ResponseType& output) final override
function BasicInferService (line 644) | void BasicInferService(std::shared_ptr<InferenceManager> resources, int ...
function PYBIND11_MODULE (line 683) | PYBIND11_MODULE(trtlab, m)
FILE: trtlab/pybind/trtlab/utils.cc
type trtlab (line 33) | namespace trtlab {
type TensorRT (line 34) | namespace TensorRT {
function DataTypeToNumpy (line 36) | py::dtype DataTypeToNumpy(::nvinfer1::DataType dtype)
FILE: trtlab/pybind/trtlab/utils.h
function namespace (line 32) | namespace trtlab {
FILE: trtlab/tensorrt/include/trtlab/tensorrt/allocator.h
function namespace (line 35) | namespace trtlab
FILE: trtlab/tensorrt/include/trtlab/tensorrt/bindings.h
function namespace (line 41) | namespace trtlab {
FILE: trtlab/tensorrt/include/trtlab/tensorrt/buffers.h
function namespace (line 40) | namespace trtlab {
FILE: trtlab/tensorrt/include/trtlab/tensorrt/common.h
function namespace (line 34) | namespace trtlab
FILE: trtlab/tensorrt/include/trtlab/tensorrt/execution_context.h
function namespace (line 10) | namespace trtlab
FILE: trtlab/tensorrt/include/trtlab/tensorrt/infer_bench.h
function namespace (line 32) | namespace trtlab {
FILE: trtlab/tensorrt/include/trtlab/tensorrt/infer_runner.h
function namespace (line 34) | namespace trtlab {
function BindingsHandle (line 106) | BindingsHandle InitializeBindings()
FILE: trtlab/tensorrt/include/trtlab/tensorrt/inference_manager.h
function namespace (line 6) | namespace trtlab
FILE: trtlab/tensorrt/include/trtlab/tensorrt/model.h
function namespace (line 9) | namespace trtlab
FILE: trtlab/tensorrt/include/trtlab/tensorrt/runtime.h
function namespace (line 39) | namespace trtlab
FILE: trtlab/tensorrt/include/trtlab/tensorrt/utils.h
function namespace (line 32) | namespace trtlab {
FILE: trtlab/tensorrt/include/trtlab/tensorrt/workspace.h
function namespace (line 14) | namespace trtlab
FILE: trtlab/tensorrt/src/bindings.cc
type trtlab (line 52) | namespace trtlab {
type TensorRT (line 53) | namespace TensorRT {
FILE: trtlab/tensorrt/src/buffers.cc
type trtlab (line 39) | namespace trtlab {
type TensorRT (line 40) | namespace TensorRT {
FILE: trtlab/tensorrt/src/infer_bench.cc
type trtlab (line 33) | namespace trtlab {
type TensorRT (line 34) | namespace TensorRT {
FILE: trtlab/tensorrt/src/inference_manager.cc
type trtlab (line 38) | namespace trtlab {
type TensorRT (line 39) | namespace TensorRT {
function Runtime (line 158) | Runtime& InferenceManager::ActiveRuntime() { return *m_ActiveRuntime; }
FILE: trtlab/tensorrt/src/model.cc
function ProfileSelectorString (line 11) | std::string ProfileSelectorString(nvinfer1::OptProfileSelector selector)
FILE: trtlab/tensorrt/src/runtime.cc
function file_exists (line 40) | bool file_exists(const std::string& name)
FILE: trtlab/tensorrt/src/utils.cc
type trtlab (line 33) | namespace trtlab {
type TensorRT (line 34) | namespace TensorRT {
function SizeofDataType (line 36) | std::size_t SizeofDataType(const nvinfer1::DataType& dtype)
function data_type_size (line 52) | std::size_t data_type_size(const nvinfer1::DataType& dtype)
function dims_element_count (line 57) | std::size_t dims_element_count(const nvinfer1::Dims& dims)
FILE: trtlab/tensorrt/tests/test_buffers.cc
class TestCyclicBuffers (line 63) | class TestCyclicBuffers : public ::testing::Test
function TEST_F (line 67) | TEST_F(TestCyclicBuffers, CyclicBuffers)
Condensed preview — 406 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,402K chars).
[
{
"path": ".bazelrc",
"chars": 135,
"preview": "build --cxxopt=-std=c++1z\nbuild --incompatible_remove_native_http_archive=false \nbuild --incompatible_package_name_is_a_"
},
{
"path": ".clang-format",
"chars": 2646,
"preview": "#BasedOnStyle: Google\n\nLanguage: Cpp\n# BasedOnStyle: LLVM\nAccessModifierOffset: -2\nAlignAfterOpenBracket: Align\n"
},
{
"path": ".dockerignore",
"chars": 49,
"preview": "build\n*.engine\nmodels\n@eaDir\n__pycache__\nbazel-*\n"
},
{
"path": ".gitmodules",
"chars": 972,
"preview": "[submodule \"third_party/cpuaff\"]\n\tpath = third_party/cpuaff\n\turl = https://github.com/dcdillon/cpuaff\n[submodule \"third_"
},
{
"path": "BUILD.bazel",
"chars": 54,
"preview": "package(default_visibility = [\"//visibility:public\"])\n"
},
{
"path": "CLA",
"chars": 7968,
"preview": " The NVIDIA TensorRT Laboratory\n Software Grant and Corporate Contributor License Agreement (\"Ag"
},
{
"path": "CMakeLists.txt",
"chars": 2515,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "CREDITS.md",
"chars": 8976,
"preview": "`ThreadPool` class was derived from https://github.com/progschj/ThreadPool\n\n> Copyright (c) 2012 Jakob Progsch, Václav Z"
},
{
"path": "Dockerfile",
"chars": 1502,
"preview": "# stage 1 - development container\n# holds the core nvidia libraries but does not container the project source code\n# use"
},
{
"path": "LICENSE",
"chars": 1523,
"preview": "BSD 3-Clause License\n\nCopyright (c) 2018-2019, NVIDIA Corporation\nAll rights reserved.\n\nRedistribution and use in source"
},
{
"path": "README.md",
"chars": 2077,
"preview": "# TensorRT Laboratory\n\nThe TensorRT Laboratory (trtlab) is a general purpose set of tools to build customer inference ap"
},
{
"path": "WORKSPACE",
"chars": 398,
"preview": "workspace(name = \"com_github_nvidia_trtlab\")\n\nload(\":bazel/repositories.bzl\", \"repositories\")\nrepositories()\n\nload (\"//b"
},
{
"path": "bazel/BUILD.bazel",
"chars": 80,
"preview": "exports_files(\n glob([\"*.bzl\"]),\n visibility = [\"//visibility:public\"],\n)\n"
},
{
"path": "bazel/cuda_configure.bzl",
"chars": 2528,
"preview": "\"\"\"Build rule generator for locally installed CUDA toolkit and cuDNN SDK.\"\"\"\n\n# src: https://github.com/google/nvidia_li"
},
{
"path": "bazel/repositories.bzl",
"chars": 3023,
"preview": "load(\"@bazel_tools//tools/build_defs/repo:http.bzl\", \"http_archive\")\n\ndef repositories():\n _maybe(\n http_archi"
},
{
"path": "bazel/tensorrt_configure.bzl",
"chars": 1406,
"preview": "\"\"\"Build rule generator for locally installed TensorRT.\"\"\"\n\n# inspired from: https://github.com/google/nvidia_libs_test\n"
},
{
"path": "build.sh",
"chars": 1785,
"preview": "#!/bin/bash\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "cmake/FindTensorRT.cmake",
"chars": 2642,
"preview": "# This module defines the following variables:\n#\n# ::\n#\n# TensorRT_INCLUDE_DIRS\n# TensorRT_LIBRARIES\n# TensorRT_FO"
},
{
"path": "cmake/Findcpuaff.cmake",
"chars": 981,
"preview": "# This module defines the following variables:\n#\n# ::\n#\n# CPUAFF_INCLUDE_DIRS\n# CPUAFF_FOUND\n#\n# ::\n#\n# Hints\n# ^^^^"
},
{
"path": "cmake/GRPCGenerateCPP.cmake",
"chars": 2741,
"preview": "\nfind_package(gRPC REQUIRED COMPONENTS grpc_cpp_plugin)\nset(_gRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plu"
},
{
"path": "cmake/GRPCGenerateCPPLikeBazel.cmake",
"chars": 3521,
"preview": "find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin) # Get full path to plugin\n\nfunction(PROTOBUF_GENERATE_GRPC_CPP_LIKE_BAZEL "
},
{
"path": "cmake/LibFindMacros.cmake",
"chars": 10701,
"preview": "# Version 2.2\n# Public Domain, originally written by Lasse Kärkkäinen <tronic>\n# Maintained at https://github.com/Tronic"
},
{
"path": "cmake/ProtobufGenerateCPPLikeBazel.cmake",
"chars": 3658,
"preview": "function(PROTOBUF_GENERATE_CPP_LIKE_BAZEL SRCS HDRS)\n cmake_parse_arguments(protobuf \"\" \"EXPORT_MACRO;DESCRIPTORS\" \"\" $"
},
{
"path": "cmake/dependencies.cmake",
"chars": 8201,
"preview": "include (ExternalProject)\n\nset (DEPENDENCIES)\nset (EXTRA_CMAKE_ARGS)\n\n# trtlab external dependencies\nlist (APPEND DEPEND"
},
{
"path": "devel.sh",
"chars": 2072,
"preview": "#!/bin/bash\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "examples/00_TensorRT/CMakeLists.txt",
"chars": 2210,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/00_TensorRT/README.md",
"chars": 3224,
"preview": "# Inference Example\n\nBasic CLI tool for executing TensorRT engines.\n\nProvide an engine and `inference.x` will run a simp"
},
{
"path": "examples/00_TensorRT/infer.cc",
"chars": 5836,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/00_TensorRT/inference.cc",
"chars": 9478,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/01_Basic_GRPC/CMakeLists.txt",
"chars": 1945,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/01_Basic_GRPC/README.md",
"chars": 228,
"preview": "Simple service to test and stress the core service and request logic.\n\nThe [`server.cc`](examples/01_Basic_GRPC/server.c"
},
{
"path": "examples/01_Basic_GRPC/src/async_client.cc",
"chars": 4033,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/01_Basic_GRPC/src/client.cpp",
"chars": 3888,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/01_Basic_GRPC/src/server.cpp",
"chars": 7856,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/02_TensorRT_GRPC/CMakeLists.txt",
"chars": 2387,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/02_TensorRT_GRPC/README.md",
"chars": 6873,
"preview": "# TensorRT GRPC Example\n\nThis examples extends the [TensorRT](examples/00_TensorRT) compute loop into an\nasync gRPC serv"
},
{
"path": "examples/02_TensorRT_GRPC/src/async-client.cc",
"chars": 7436,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/02_TensorRT_GRPC/src/metrics.cc",
"chars": 2406,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/02_TensorRT_GRPC/src/metrics.h",
"chars": 2045,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/02_TensorRT_GRPC/src/server.cc",
"chars": 15410,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/02_TensorRT_GRPC/src/siege.cc",
"chars": 11985,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/02_TensorRT_GRPC/src/sync-client.cc",
"chars": 5067,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/03_Batching/CMakeLists.txt",
"chars": 1862,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/03_Batching/README.md",
"chars": 5029,
"preview": "# Batching Service\n\nA batching service is a service that trying to collect sets of similar requests into a\ncollective ba"
},
{
"path": "examples/03_Batching/inference-batcher.cc",
"chars": 15333,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/03_Batching/launch_batching.sh",
"chars": 2181,
"preview": "#!/bin/bash -e\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in sourc"
},
{
"path": "examples/03_Batching/simple_batching_client.py",
"chars": 618,
"preview": "import grpc\n\nimport simple_pb2\nimport simple_pb2_grpc\n\n\ndef run():\n with grpc.insecure_channel('localhost:50051') as "
},
{
"path": "examples/03_Batching/simple_pb2.py",
"chars": 3780,
"preview": "# Generated by the protocol buffer compiler. DO NOT EDIT!\n# source: simple.proto\n\nimport sys\n_b=sys.version_info[0]<3 a"
},
{
"path": "examples/03_Batching/simple_pb2_grpc.py",
"chars": 2118,
"preview": "# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!\nimport grpc\n\nimport simple_pb2 as simple__pb2\n\n\ncl"
},
{
"path": "examples/03_Batching/streaming-service.cc",
"chars": 4250,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/03_Batching/unary_client.py",
"chars": 369,
"preview": "import grpc\n\nimport simple_pb2\nimport simple_pb2_grpc\n\n\ndef run():\n with grpc.insecure_channel('localhost:50049') as "
},
{
"path": "examples/04_Middleman/CMakeLists.txt",
"chars": 1696,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/04_Middleman/middleman-client.cc",
"chars": 14971,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/10_Internals/CMakeLists.txt",
"chars": 1665,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/10_Internals/README.md",
"chars": 13336,
"preview": "# YAIS Internals\n\nThe `internals.x` program is designed to be run on a DGX-Station or DGX-1. This is mostly to highligh"
},
{
"path": "examples/10_Internals/internals.cc",
"chars": 7007,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/11_Protos/CMakeLists.txt",
"chars": 1847,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/11_Protos/demo/CMakeLists.txt",
"chars": 1974,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/11_Protos/demo/dataset.proto",
"chars": 2001,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/11_Protos/demo/inference.proto",
"chars": 2550,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/11_Protos/echo/CMakeLists.txt",
"chars": 1928,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/11_Protos/echo/echo.proto",
"chars": 2053,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/11_Protos/inference/CMakeLists.txt",
"chars": 2062,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/11_Protos/inference/api.proto",
"chars": 4225,
"preview": "// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n//\n// Redistribution and use in source and binary f"
},
{
"path": "examples/11_Protos/inference/model_config.proto",
"chars": 6675,
"preview": "// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n//\n// Redistribution and use in source and binary f"
},
{
"path": "examples/11_Protos/inference/nvidia_inference.proto",
"chars": 3909,
"preview": "// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n//\n// Redistribution and use in source and binary f"
},
{
"path": "examples/11_Protos/inference/request_status.proto",
"chars": 2192,
"preview": "// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n//\n// Redistribution and use in source and binary f"
},
{
"path": "examples/11_Protos/inference/server_status.proto",
"chars": 4866,
"preview": "// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n//\n// Redistribution and use in source and binary f"
},
{
"path": "examples/12_ConfigGenerator/CMakeLists.txt",
"chars": 1677,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/12_ConfigGenerator/README.md",
"chars": 2199,
"preview": "# TensorRT Inference Server Model Store Builder\n\n- Ensure you built the project.\n- Run `./link.sh` in this directory\n\n##"
},
{
"path": "examples/12_ConfigGenerator/generator.cc",
"chars": 5315,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/12_ConfigGenerator/link.sh",
"chars": 108,
"preview": "#!/bin/bash\n\nln -s /work/build/examples/12_ConfigGenerator/config_generator.cpython-35m-x86_64-linux-gnu.so\n"
},
{
"path": "examples/12_ConfigGenerator/ms_mgmt",
"chars": 3276,
"preview": "#!/usr/bin/env python3\nimport os\nimport pathlib\nimport shutil\n\nfrom contextlib import contextmanager\n\nimport click\nimpor"
},
{
"path": "examples/12_FlatBuffers/CMakeLists.txt",
"chars": 2242,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/12_FlatBuffers/client.cc",
"chars": 4324,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/12_FlatBuffers/example.fbs",
"chars": 274,
"preview": "table HelloReply {\n message:string;\n}\n\ntable HelloRequest {\n name:string;\n}\n\ntable ManyHellosRequest {\n name:string;\n"
},
{
"path": "examples/12_FlatBuffers/example.grpc.fb.cc",
"chars": 6098,
"preview": "// Generated by the gRPC C++ plugin.\n// If you make any local change, they will be lost.\n// source: example\n\n#include \"e"
},
{
"path": "examples/12_FlatBuffers/example.grpc.fb.h",
"chars": 18758,
"preview": "// Generated by the gRPC C++ plugin.\n// If you make any local change, they will be lost.\n// source: example\n#ifndef GRPC"
},
{
"path": "examples/12_FlatBuffers/example_generated.h",
"chars": 5670,
"preview": "// automatically generated by the FlatBuffers compiler, do not modify\n\n#ifndef FLATBUFFERS_GENERATED_EXAMPLE_H_\n#define "
},
{
"path": "examples/12_FlatBuffers/server.cc",
"chars": 4488,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/30_PyTensorRT/README.md",
"chars": 29,
"preview": "# Python Inference Example\n\n\n"
},
{
"path": "examples/30_PyTensorRT/client.py",
"chars": 1262,
"preview": "#!/usr/bin/env python3\nimport os\nimport time\n\nimport numpy as np\n\nimport infer\nimport infer_test_utils as utils\n\n\ndef ma"
},
{
"path": "examples/30_PyTensorRT/compute.py",
"chars": 1480,
"preview": "#!/usr/bin/env python3\nimport itertools\nimport os\nimport time\n\nimport numpy as np\n\nimport trtlab\nimport infer_test_utils"
},
{
"path": "examples/30_PyTensorRT/infer_test_utils.py",
"chars": 1352,
"preview": "#!/usr/bin/env python3\nimport glob\nimport os\n\nimport onnx\nfrom onnx import numpy_helper\nfrom matplotlib import pyplot as"
},
{
"path": "examples/30_PyTensorRT/rebuild.sh",
"chars": 247,
"preview": "#!/bin/bash\ncd /work/build/tensorrt-laboratory/python\nmake -j\ncd /work/examples/30_PyTensorRT\nif [ ! -e infer.cpython-35"
},
{
"path": "examples/30_PyTensorRT/server.py",
"chars": 1270,
"preview": "#!/usr/bin/env python3\nimport os\nimport time\n\nimport numpy as np\n\nimport infer\nimport infer_test_utils as utils\n\n\ndef ma"
},
{
"path": "examples/90_Kubernetes/README.md",
"chars": 3543,
"preview": "# Kubernetes\n\nUsing [Kubernetes on NVIDIA GPUs, aka KONG](https://developer.nvidia.com/kubernetes-gpu) is a great\nway of"
},
{
"path": "examples/90_Kubernetes/bootstrap-minikube.sh",
"chars": 487,
"preview": "#!/bin/bash\n\nif ! [ -x \"$(command -v helm)\" ]; then\n echo 'Error: helm is not installed.' >&2\n exit 1\nfi\n\n# minikube\n("
},
{
"path": "examples/90_Kubernetes/deploy/build-and-run.sh",
"chars": 373,
"preview": "#!/bin/bash\n\ndefault_engine=/work/models/ResNet-152-b8-fp16.engine\nconcurrency=${YAIS_CONCURRENCY:-1}\nengine=${YAIS_TRT_"
},
{
"path": "examples/90_Kubernetes/devel/README.md",
"chars": 4091,
"preview": "\n## Round 1: External Service\n\nBefore deploying a YAIS service with Kubernetes, we will first setup a developer environm"
},
{
"path": "examples/90_Kubernetes/devel/yais-devel.yml",
"chars": 670,
"preview": "---\napiVersion: v1\nkind: Service\nmetadata:\n name: yais-devel\n labels:\n app: yais-devel\nspec:\n ports:\n - name: met"
},
{
"path": "examples/90_Kubernetes/istio/README.md",
"chars": 918,
"preview": "# Istio\n\n## Install\n\n```\n# Download the latest release\ncurl -L https://git.io/getLatestIstio | sh -\n\n# Istio 1.0\nhelm te"
},
{
"path": "examples/90_Kubernetes/istio/rendered/istio-v0.8-minikube.yml",
"chars": 98043,
"preview": "apiVersion: v1\nkind: Namespace\nmetadata:\n name: istio-system\n---\n# Source: istio/charts/mixer/templates/configmap.yaml\na"
},
{
"path": "examples/90_Kubernetes/istio/rendered/istio-v1.0-minikube.yml",
"chars": 119176,
"preview": "apiVersion: v1\nkind: Namespace\nmetadata:\n name: istio-system\n---\n# Source: istio/charts/galley/templates/configmap.yaml\n"
},
{
"path": "examples/90_Kubernetes/minikube/README.md",
"chars": 1205,
"preview": "# Development with Minikube\n\n## Install `minikube`, `kubectl`, and `helm`\n\nThis only need to be done one time, or period"
},
{
"path": "examples/90_Kubernetes/minikube/bootstrap.sh",
"chars": 1745,
"preview": "#!/bin/bash\n\nmkdir -p $HOME/.kube\ntouch $HOME/.kube/config\n\nexport MINIKUBE_HOME=$HOME\nexport CHANGE_MINIKUBE_NONE_USER="
},
{
"path": "examples/90_Kubernetes/prometheus/bootstrap.sh",
"chars": 425,
"preview": "#!/bin/bash\n\nkubectl create -f service-account.yml\n\nhelm init --wait --service-account tiller\n\nhelm repo add coreos http"
},
{
"path": "examples/90_Kubernetes/prometheus/custom-settings.yml",
"chars": 12294,
"preview": "global:\n rbacEnable: true\n\n#prometheus:\n# service:\n# type: NodePort\n\ngrafana:\n# image:\n# tag: 5.2.1\n service:\n "
},
{
"path": "examples/90_Kubernetes/prometheus/service-account.yml",
"chars": 502,
"preview": "# Create a service account for Helm and grant the cluster admin role.\n# It is assumed that helm should be installed with"
},
{
"path": "examples/90_Kubernetes/prometheus/yais-dashboard.json",
"chars": 6921,
"preview": "{\n \"__inputs\": [\n {\n \"name\": \"DS_YAIS\",\n \"label\": \"yais\",\n \"description\": \"\",\n \"type\": \"datasour"
},
{
"path": "examples/90_Kubernetes/prometheus/yais-metrics.yml",
"chars": 1150,
"preview": "#\n# Create a Service Account, Role, Role Binding\n# YAIS Specific Prometheus (via Operator) and Service\n#\n---\napiVersion:"
},
{
"path": "examples/90_Kubernetes/yais-deploy.yml",
"chars": 2071,
"preview": "---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: yais-example\nspec:\n replicas: 1\n selector:\n matchLabels:"
},
{
"path": "examples/91_Prometheus/README.md",
"chars": 18,
"preview": "# Prometheus\n\nWIP\n"
},
{
"path": "examples/91_Prometheus/scrape.conf",
"chars": 104,
"preview": "[[inputs.prometheus]]\n urls = [\"http://localhost:50078/metrics\"]\n[[outputs.file]]\n files = [\"stdout\"]\n"
},
{
"path": "examples/97_SingleProcessMultiSteam/launch_service.sh",
"chars": 2294,
"preview": "#!/bin/bash -e\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in sourc"
},
{
"path": "examples/98_MultiProcessSingleStream/README.md",
"chars": 1251,
"preview": "# MPS Examples\n\n`run_throughput_test ncopies batch_size engine_file MPS/NOMPS`\n\nV100 - 16GB - DGX-1V\n\nProcesses | MPS | "
},
{
"path": "examples/98_MultiProcessSingleStream/run_latency_test",
"chars": 2053,
"preview": "#!/bin/bash -e\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in sourc"
},
{
"path": "examples/98_MultiProcessSingleStream/run_throughput_test",
"chars": 2836,
"preview": "#!/bin/bash -e\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in sourc"
},
{
"path": "examples/98_MultiProcessSingleStream/setup.py",
"chars": 2362,
"preview": "#!/usr/bin/env python3\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use "
},
{
"path": "examples/99_LoadBalancer/README.md",
"chars": 813,
"preview": "# Envoy Load Balancer\n\nVery basic Envoy Proxy L7 load balancer for testing purposes.\n\n`run_loadbalancer.py -n <number of"
},
{
"path": "examples/99_LoadBalancer/lb-envoy.j2",
"chars": 2679,
"preview": "{#-\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary "
},
{
"path": "examples/99_LoadBalancer/run_loadbalancer.py",
"chars": 3047,
"preview": "#!/usr/bin/env python3\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use "
},
{
"path": "examples/CMakeLists.txt",
"chars": 1914,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/Deployment/CMakeLists.txt",
"chars": 62,
"preview": "add_subdirectory(ImageClient)\nadd_subdirectory(RouteRequests)\n"
},
{
"path": "examples/Deployment/ImageClient/CMakeLists.txt",
"chars": 815,
"preview": "set(protobuf_MODULE_COMPATIBLE TRUE)\nfind_package(Protobuf CONFIG REQUIRED)\nmessage(STATUS \"Using protobuf ${protobuf_VE"
},
{
"path": "examples/Deployment/ImageClient/api.proto",
"chars": 2740,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/Deployment/ImageClient/client.cc",
"chars": 5897,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/Deployment/ImageClient/client.h",
"chars": 2803,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/Deployment/ImageClient/client.py",
"chars": 2812,
"preview": "## Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n##\n## Redistribution and use in source and binary f"
},
{
"path": "examples/Deployment/Kubernetes/basic-trtis-deployment/deploy.yml",
"chars": 1655,
"preview": "---\napiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: basic-trtis-deployment\n namespace: trtlab\nspec:\n replicas: "
},
{
"path": "examples/Deployment/Kubernetes/basic-trtis-deployment/istio-ingress.yml",
"chars": 1040,
"preview": "---\napiVersion: networking.istio.io/v1alpha3\nkind: Gateway\nmetadata:\n name: basic-trtis-deployment-gateway\n namespace:"
},
{
"path": "examples/Deployment/Kubernetes/basic-trtis-deployment/scrape-metrics.yml",
"chars": 305,
"preview": "---\napiVersion: monitoring.coreos.com/v1\nkind: ServiceMonitor\nmetadata:\n name: basic-trtis-deployment\n namespace: trtl"
},
{
"path": "examples/Deployment/ObjectStore/README.md",
"chars": 2006,
"preview": "# Object Store\n\nIn the Image Service example, the ImageClient separates out an inference request\ninto two components: \n-"
},
{
"path": "examples/Deployment/ObjectStore/create_buckets.py",
"chars": 391,
"preview": "import os\nimport boto3\n\ns3 = boto3.client(\"s3\", use_ssl=False, verify=False, \n endpoint_url=os.environ."
},
{
"path": "examples/Deployment/ObjectStore/get_rook_s3_keys.sh",
"chars": 449,
"preview": "#!/bin/bash\nobjstore=trtlab-s3\nuser=trtlab\necho -n export AWS_ACCESS_KEY_ID=\nkubectl -n rook-ceph get secret rook-ceph-o"
},
{
"path": "examples/Deployment/ObjectStore/ingress-istio.yml",
"chars": 618,
"preview": "# not working yet\n---\napiVersion: networking.istio.io/v1alpha3\nkind: Gateway\nmetadata:\n name: trtlab-s3-gateway\nspec:\n "
},
{
"path": "examples/Deployment/ObjectStore/ingress-nginx.yml",
"chars": 453,
"preview": "apiVersion: extensions/v1beta1\nkind: Ingress\nmetadata:\n annotations:\n nginx.ingress.kubernetes.io/proxy-body-size: \""
},
{
"path": "examples/Deployment/ObjectStore/rook-s3.yml",
"chars": 558,
"preview": "---\napiVersion: ceph.rook.io/v1\nkind: CephObjectStore\nmetadata:\n name: trtlab-s3\n namespace: rook-ceph\nspec:\n metadat"
},
{
"path": "examples/Deployment/README.md",
"chars": 10085,
"preview": "# Deploying Inference Services\n\nThis document/example folder is a work in progress. Its intent is to cover\nvarious aspec"
},
{
"path": "examples/Deployment/RouteRequests/CMakeLists.txt",
"chars": 155,
"preview": "add_executable(test_image_service.x\n test_service.cc\n)\n\ntarget_link_libraries(test_image_service.x\nPUBLIC\n nvrpc\n dep"
},
{
"path": "examples/Deployment/RouteRequests/README.md",
"chars": 3805,
"preview": "# Routing Requests\n\nIf we have multiple instances of TRTIS each with different models, we need a way\nto route requests t"
},
{
"path": "examples/Deployment/RouteRequests/envoy_config.yaml",
"chars": 2284,
"preview": "static_resources:\n listeners:\n - name: listener_0\n address:\n socket_address: { address: 0.0.0.0, port_value: 5"
},
{
"path": "examples/Deployment/RouteRequests/test_client.py",
"chars": 1116,
"preview": "import os\n\nimport deploy_image_client as cpp_client\n\n\ndef main():\n if not os.environ.get(\"TRTLAB_ROUTING_TEST\"):\n "
},
{
"path": "examples/Deployment/RouteRequests/test_routing.sh",
"chars": 982,
"preview": "#!/bin/bash\n\ncleanup() {\n kill $(jobs -p) ||:\n}\ntrap \"cleanup\" EXIT SIGINT SIGTERM\n\n(cd /work/build/examples/Deployment"
},
{
"path": "examples/Deployment/RouteRequests/test_service.cc",
"chars": 3861,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/Deployment/batcher.cc",
"chars": 5732,
"preview": "\n\ntemplate<typename Request, typename Response>\nclass BatchingService\n{\n public:\n using PrepareFn =\n std::fun"
},
{
"path": "examples/ONNX/resnet50/README.md",
"chars": 2454,
"preview": "# TensorRT ResNet50 Example\n\n- `fetch.sh` downloads the onnx model, test data, and calibration images from S3\n - after "
},
{
"path": "examples/ONNX/resnet50/build.py",
"chars": 3047,
"preview": "#!/usr/bin/env python3\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use "
},
{
"path": "examples/ONNX/resnet50/calibration_images.csv",
"chars": 3123,
"preview": "url,license,label\nhttps://www.openfotos.com/pictures/red-rock-crab-1096.full.jpg,Open Fotos License,crab\nhttps://www.ope"
},
{
"path": "examples/ONNX/resnet50/calibrator.py",
"chars": 6563,
"preview": "#\n# Copyright 1993-2019 NVIDIA Corporation. All rights reserved.\n#\n# NOTICE TO LICENSEE:\n#\n# This source code and/or do"
},
{
"path": "examples/ONNX/resnet50/fetch.sh",
"chars": 701,
"preview": "#!/bin/bash\n\nif [ ! -e \"resnet50.tar.gz\" ]; then\n wget https://s3.amazonaws.com/download.onnx/models/opset_8/resnet50.t"
},
{
"path": "examples/ONNX/resnet50/imagenet_labels.py",
"chars": 30575,
"preview": "\nlabels = {0: 'tench, Tinca tinca',\n 1: 'goldfish, Carassius auratus',\n 2: 'great white shark, white shark, man-eater, m"
},
{
"path": "examples/ONNX/resnet50/int8.py",
"chars": 1093,
"preview": "import calibrator\nimport tensorrt as trt\n\n# Use TensorRT ONNX parser to parse model file, and enable INT8 calibration du"
},
{
"path": "examples/ONNX/resnet50/onnx_utils.py",
"chars": 1352,
"preview": "#!/usr/bin/env python3\nimport glob\nimport os\n\nimport onnx\nfrom onnx import numpy_helper\nfrom matplotlib import pyplot as"
},
{
"path": "examples/ONNX/resnet50/open_source_images.md5",
"chars": 60,
"preview": "6cd502bc217f3960cf34447ec4ede610 open_source_images.tar.gz\n"
},
{
"path": "examples/ONNX/resnet50/resnet50.md5",
"chars": 50,
"preview": "0e8088c7b1a1a9b2d0a5ae05601cc55e resnet50.tar.gz\n"
},
{
"path": "examples/ONNX/resnet50/run_jpeg_test.py",
"chars": 2327,
"preview": "#!/usr/bin/env python3\n\nimport os\nimport time\n\nimport trtlab\nimport onnx_utils as utils\n\nimport numpy as np\nimport matpl"
},
{
"path": "examples/ONNX/resnet50/run_onnx_tests.py",
"chars": 2319,
"preview": "#!/usr/bin/env python3\n\nimport os\n\nimport trtlab\nimport numpy as np\n\nimport click\nimport onnx_utils as utils\n\ntests = {}"
},
{
"path": "examples/nvRPC/CMakeLists.txt",
"chars": 1700,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/nvRPC/SharedMemoryService/CMakeLists.txt",
"chars": 1778,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/nvRPC/SharedMemoryService/README.md",
"chars": 983,
"preview": "# Shared Memory Service\n\nClient/Server service extending the Basic nvRPC example.\n\nThe client (`sysv-client.x`) creates "
},
{
"path": "examples/nvRPC/SharedMemoryService/client.cc",
"chars": 4852,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/SharedMemoryService/server.cc",
"chars": 7037,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/StreamingInOrderSendRecv/CMakeLists.txt",
"chars": 1857,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/nvRPC/StreamingInOrderSendRecv/README.md",
"chars": 526,
"preview": "# BidirectionalStream In-Order Send/Recv\n\n```\nrpc InOrderSendRecv (stream Request) returns (stream Response)\n```\n\nThe se"
},
{
"path": "examples/nvRPC/StreamingInOrderSendRecv/client.cc",
"chars": 4647,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/StreamingInOrderSendRecv/server.cc",
"chars": 7559,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/StreamingInOrderSendRecv/test.sh",
"chars": 438,
"preview": "#!/bin/bash \n\ncleanup() {\n kill $(jobs -p) ||:\n}\ntrap \"cleanup\" EXIT SIGINT SIGTERM\n\n./nvrpc-bidirectional-server.x --i"
},
{
"path": "examples/nvRPC/StreamingService/CMakeLists.txt",
"chars": 1916,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/nvRPC/StreamingService/README.md",
"chars": 2375,
"preview": "# Streaming Examples\n\nAsync gRPC streaming can take on many forms. nvRPC provides a set of LifeCycles\nto accommodate a v"
},
{
"path": "examples/nvRPC/StreamingService/client.cc",
"chars": 4784,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/StreamingService/common.h",
"chars": 2120,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/StreamingService/even-odds.cc",
"chars": 3641,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/StreamingService/ping-pong.cc",
"chars": 3296,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/StreamingService/test.sh",
"chars": 470,
"preview": "#!/bin/bash \n\ncleanup() {\n kill $(jobs -p) ||:\n}\ntrap \"cleanup\" EXIT SIGINT SIGTERM\n\nexport PATH=\".:$PATH\"\n\nexe=${1:-\"."
},
{
"path": "examples/nvRPC/UnaryService/CMakeLists.txt",
"chars": 1824,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "examples/nvRPC/UnaryService/client.cc",
"chars": 4034,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "examples/nvRPC/UnaryService/server.cc",
"chars": 7871,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "jupyter_notebook_config.py",
"chars": 29357,
"preview": "# Configuration file for jupyter-notebook.\n\n#---------------------------------------------------------------------------"
},
{
"path": "models/README.md",
"chars": 1649,
"preview": "## Sample Models\n\nIncluded in this folder are a collection of open source models and\nsome scripts to build TensorRT engi"
},
{
"path": "models/ResNet-152-deploy.prototxt",
"chars": 98034,
"preview": "name: \"ResNet-152\"\ninput: \"data\"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 224\ninput_dim: 224\n\nlayer {\n\tbottom: \"data\"\n\ttop: "
},
{
"path": "models/ResNet-50-deploy.prototxt",
"chars": 32500,
"preview": "name: \"ResNet-50\"\ninput: \"data\"\ninput_dim: 1\ninput_dim: 3\ninput_dim: 224\ninput_dim: 224\n\nlayer {\n\tbottom: \"data\"\n\ttop: \""
},
{
"path": "models/mps_builder",
"chars": 1955,
"preview": "#!/bin/bash -e\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in sourc"
},
{
"path": "models/onnx/common.py",
"chars": 3806,
"preview": "import os\nimport argparse\nimport numpy as np\nimport pycuda.driver as cuda\nimport tensorrt as trt\n\ntry:\n # Sometimes p"
},
{
"path": "models/onnx/mnist-v1.3/test_data_set_0/output_0.pb",
"chars": 31,
"preview": "\b\u0001\b\n\u0010\u0001J(sDU\u001aČtE\u0001'DWQeYQ\u0018\u00123vNKB\u0003"
},
{
"path": "models/onnx/mnist-v1.3/test_data_set_1/output_0.pb",
"chars": 29,
"preview": "\b\u0001\b\n\u0010\u0001J(\u001cE\f\u000e_\u0001;ÁXy\u0019Ą*_DHԺÓ!9Z"
},
{
"path": "models/onnx/mnist-v1.3/test_data_set_2/output_0.pb",
"chars": 32,
"preview": "\b\u0001\b\n\u0010\u0001J(l\u0011\u00034ĐDM\u000fW\u001fD\u001b˿>'\u0002a¤&B\u00156hE"
},
{
"path": "models/onnx/onnx_builder.py",
"chars": 10142,
"preview": "# This sample uses an ONNX ResNet50 Model to create a TensorRT Inference Engine\nimport random\nfrom PIL import Image\nimpo"
},
{
"path": "models/setup.py",
"chars": 2366,
"preview": "#!/usr/bin/env python3\n#\n# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use "
},
{
"path": "notebooks/Demo Day 1.ipynb",
"chars": 3934,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "notebooks/Demo Day 2.ipynb",
"chars": 3658,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "notebooks/Demo Day 3.ipynb",
"chars": 2115,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "notebooks/Multiple Models.ipynb",
"chars": 8231,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": [\n "
},
{
"path": "notebooks/Quickstart.ipynb",
"chars": 7325,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# TensorRT Runtime\\n\",\n \"\\n\",\n "
},
{
"path": "notebooks/README.md",
"chars": 173,
"preview": "# Jupyter Notebooks\n\nNote: Many of the ONNX examples will fail until TensorRT 5.1 is available in the container.\n\nThis i"
},
{
"path": "requirements.txt",
"chars": 1194,
"preview": "appdirs==1.4.3\natomicwrites==1.2.1\nattrs==18.2.0\nbackcall==0.1.0\nbleach>=3.1.1\nboto3==1.9.109\nbotocore==1.12.109\nclick=="
},
{
"path": "trtlab/BUILD.bazel",
"chars": 120,
"preview": "exports_files([\n \"core\",\n \"cuda\",\n \"nvrpc\",\n \"tensorrt\",\n ],\n visibility = [\"//visibility:public\"],\n)\n"
},
{
"path": "trtlab/CMakeLists.txt",
"chars": 1675,
"preview": "#cmake_minimum_required(VERSION 3.9 FATAL_ERROR)\n\n#project(trtlab)\n\n#include(GNUInstallDirs)\n\n#option(ENABLE_TESTING \"Bu"
},
{
"path": "trtlab/core/BUILD.bazel",
"chars": 291,
"preview": "cc_library(\n name = \"core\",\n srcs = glob([\n \"src/**/*.cc\",\n \"src/**/*.h\",\n ]),\n hdrs = glob(\n "
},
{
"path": "trtlab/core/CMakeLists.txt",
"chars": 2147,
"preview": "include(GNUInstallDirs)\nset(CMAKE_THREAD_PREFER_PTHREAD TRUE)\nfind_package(Threads)\nfind_package(cpuaff)\nfind_package(gl"
},
{
"path": "trtlab/core/benchmarks/CMakeLists.txt",
"chars": 1862,
"preview": "# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "trtlab/core/benchmarks/bench_batcher.cc",
"chars": 4427,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "trtlab/core/benchmarks/bench_memory.cc",
"chars": 3438,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "trtlab/core/benchmarks/bench_memory_stack.cc",
"chars": 6526,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "trtlab/core/benchmarks/bench_pool.cc",
"chars": 3468,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "trtlab/core/benchmarks/bench_thread_pool.cc",
"chars": 2383,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "trtlab/core/benchmarks/main.cc",
"chars": 1618,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "trtlab/core/include/trtlab/core/affinity.h",
"chars": 3933,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "trtlab/core/include/trtlab/core/async_compute.h",
"chars": 4743,
"preview": "/* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Redistribution and use in source and binary f"
},
{
"path": "trtlab/core/include/trtlab/core/batcher.h",
"chars": 5219,
"preview": "#pragma once\n\n#include <chrono>\n#include <future>\n#include <memory>\n#include <optional>\n#include <queue>\n#include <vecto"
}
]
// ... and 206 more files (download for full content)
About this extraction
This page contains the full source code of the NVIDIA/tensorrt-laboratory GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 406 files (2.2 MB), approximately 589.3k tokens, and a symbol index with 1107 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.