Repository: NVIDIA/tensorrt-laboratory Branch: v2 Commit: 33b6fdf2935c Files: 406 Total size: 2.2 MB Directory structure: gitextract_8sd61058/ ├── .bazelrc ├── .clang-format ├── .dockerignore ├── .gitmodules ├── BUILD.bazel ├── CLA ├── CMakeLists.txt ├── CREDITS.md ├── Dockerfile ├── LICENSE ├── README.md ├── WORKSPACE ├── bazel/ │ ├── BUILD.bazel │ ├── cuda_configure.bzl │ ├── repositories.bzl │ └── tensorrt_configure.bzl ├── build.sh ├── cmake/ │ ├── FindTensorRT.cmake │ ├── Findcpuaff.cmake │ ├── GRPCGenerateCPP.cmake │ ├── GRPCGenerateCPPLikeBazel.cmake │ ├── LibFindMacros.cmake │ ├── ProtobufGenerateCPPLikeBazel.cmake │ └── dependencies.cmake ├── devel.sh ├── examples/ │ ├── 00_TensorRT/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── infer.cc │ │ └── inference.cc │ ├── 01_Basic_GRPC/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ └── src/ │ │ ├── async_client.cc │ │ ├── client.cpp │ │ └── server.cpp │ ├── 02_TensorRT_GRPC/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ └── src/ │ │ ├── async-client.cc │ │ ├── metrics.cc │ │ ├── metrics.h │ │ ├── server.cc │ │ ├── siege.cc │ │ └── sync-client.cc │ ├── 03_Batching/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── inference-batcher.cc │ │ ├── launch_batching.sh │ │ ├── simple_batching_client.py │ │ ├── simple_pb2.py │ │ ├── simple_pb2_grpc.py │ │ ├── streaming-service.cc │ │ └── unary_client.py │ ├── 04_Middleman/ │ │ ├── CMakeLists.txt │ │ └── middleman-client.cc │ ├── 10_Internals/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ └── internals.cc │ ├── 11_Protos/ │ │ ├── CMakeLists.txt │ │ ├── demo/ │ │ │ ├── CMakeLists.txt │ │ │ ├── dataset.proto │ │ │ └── inference.proto │ │ ├── echo/ │ │ │ ├── CMakeLists.txt │ │ │ └── echo.proto │ │ └── inference/ │ │ ├── CMakeLists.txt │ │ ├── api.proto │ │ ├── model_config.proto │ │ ├── nvidia_inference.proto │ │ ├── request_status.proto │ │ └── server_status.proto │ ├── 12_ConfigGenerator/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── generator.cc │ │ ├── link.sh │ │ └── ms_mgmt │ ├── 12_FlatBuffers/ │ │ ├── CMakeLists.txt │ │ ├── client.cc │ │ ├── example.fbs │ │ ├── example.grpc.fb.cc │ │ ├── example.grpc.fb.h │ │ ├── example_generated.h │ │ └── server.cc │ ├── 30_PyTensorRT/ │ │ ├── README.md │ │ ├── client.py │ │ ├── compute.py │ │ ├── infer_test_utils.py │ │ ├── rebuild.sh │ │ └── server.py │ ├── 90_Kubernetes/ │ │ ├── README.md │ │ ├── bootstrap-minikube.sh │ │ ├── deploy/ │ │ │ └── build-and-run.sh │ │ ├── devel/ │ │ │ ├── README.md │ │ │ └── yais-devel.yml │ │ ├── istio/ │ │ │ ├── README.md │ │ │ └── rendered/ │ │ │ ├── istio-v0.8-minikube.yml │ │ │ └── istio-v1.0-minikube.yml │ │ ├── minikube/ │ │ │ ├── README.md │ │ │ └── bootstrap.sh │ │ ├── prometheus/ │ │ │ ├── bootstrap.sh │ │ │ ├── custom-settings.yml │ │ │ ├── service-account.yml │ │ │ ├── yais-dashboard.json │ │ │ └── yais-metrics.yml │ │ └── yais-deploy.yml │ ├── 91_Prometheus/ │ │ ├── README.md │ │ └── scrape.conf │ ├── 97_SingleProcessMultiSteam/ │ │ └── launch_service.sh │ ├── 98_MultiProcessSingleStream/ │ │ ├── README.md │ │ ├── run_latency_test │ │ ├── run_throughput_test │ │ └── setup.py │ ├── 99_LoadBalancer/ │ │ ├── README.md │ │ ├── lb-envoy.j2 │ │ └── run_loadbalancer.py │ ├── CMakeLists.txt │ ├── Deployment/ │ │ ├── CMakeLists.txt │ │ ├── ImageClient/ │ │ │ ├── CMakeLists.txt │ │ │ ├── api.proto │ │ │ ├── client.cc │ │ │ ├── client.h │ │ │ └── client.py │ │ ├── Kubernetes/ │ │ │ └── basic-trtis-deployment/ │ │ │ ├── deploy.yml │ │ │ ├── istio-ingress.yml │ │ │ └── scrape-metrics.yml │ │ ├── ObjectStore/ │ │ │ ├── README.md │ │ │ ├── create_buckets.py │ │ │ ├── get_rook_s3_keys.sh │ │ │ ├── ingress-istio.yml │ │ │ ├── ingress-nginx.yml │ │ │ └── rook-s3.yml │ │ ├── README.md │ │ ├── RouteRequests/ │ │ │ ├── CMakeLists.txt │ │ │ ├── README.md │ │ │ ├── envoy_config.yaml │ │ │ ├── test_client.py │ │ │ ├── test_routing.sh │ │ │ └── test_service.cc │ │ └── batcher.cc │ ├── ONNX/ │ │ └── resnet50/ │ │ ├── README.md │ │ ├── build.py │ │ ├── calibration_images.csv │ │ ├── calibrator.py │ │ ├── fetch.sh │ │ ├── imagenet_labels.py │ │ ├── int8.py │ │ ├── onnx_utils.py │ │ ├── open_source_images.md5 │ │ ├── resnet50.md5 │ │ ├── run_jpeg_test.py │ │ └── run_onnx_tests.py │ └── nvRPC/ │ ├── CMakeLists.txt │ ├── SharedMemoryService/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── client.cc │ │ └── server.cc │ ├── StreamingInOrderSendRecv/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── client.cc │ │ ├── server.cc │ │ └── test.sh │ ├── StreamingService/ │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── client.cc │ │ ├── common.h │ │ ├── even-odds.cc │ │ ├── ping-pong.cc │ │ └── test.sh │ └── UnaryService/ │ ├── CMakeLists.txt │ ├── client.cc │ └── server.cc ├── jupyter_notebook_config.py ├── models/ │ ├── README.md │ ├── ResNet-152-deploy.prototxt │ ├── ResNet-50-deploy.prototxt │ ├── mps_builder │ ├── onnx/ │ │ ├── common.py │ │ ├── mnist-v1.3/ │ │ │ ├── model.onnx │ │ │ ├── test_data_set_0/ │ │ │ │ ├── input_0.pb │ │ │ │ └── output_0.pb │ │ │ ├── test_data_set_1/ │ │ │ │ ├── input_0.pb │ │ │ │ └── output_0.pb │ │ │ └── test_data_set_2/ │ │ │ ├── input_0.pb │ │ │ └── output_0.pb │ │ └── onnx_builder.py │ └── setup.py ├── notebooks/ │ ├── Demo Day 1.ipynb │ ├── Demo Day 2.ipynb │ ├── Demo Day 3.ipynb │ ├── Multiple Models.ipynb │ ├── Quickstart.ipynb │ └── README.md ├── requirements.txt └── trtlab/ ├── BUILD.bazel ├── CMakeLists.txt ├── core/ │ ├── BUILD.bazel │ ├── CMakeLists.txt │ ├── benchmarks/ │ │ ├── CMakeLists.txt │ │ ├── bench_batcher.cc │ │ ├── bench_memory.cc │ │ ├── bench_memory_stack.cc │ │ ├── bench_pool.cc │ │ ├── bench_thread_pool.cc │ │ └── main.cc │ ├── include/ │ │ └── trtlab/ │ │ └── core/ │ │ ├── affinity.h │ │ ├── async_compute.h │ │ ├── batcher.h │ │ ├── cyclic_buffer.h │ │ ├── cyclic_windowed_buffer.h │ │ ├── dispatcher.h │ │ ├── fiber_group.h │ │ ├── hybrid_condition.h │ │ ├── hybrid_mutex.h │ │ ├── memory/ │ │ │ └── first_touch_allocator.h │ │ ├── pool.h │ │ ├── ranges.h │ │ ├── resources.h │ │ ├── standard_threads.h │ │ ├── task_pool.h │ │ ├── thread_pool.h │ │ ├── types.h │ │ ├── userspace_threads.h │ │ └── utils.h │ ├── src/ │ │ ├── affinity.cc │ │ ├── cyclic_buffer.cc │ │ ├── cyclic_windowed_buffer.cc │ │ ├── memory/ │ │ │ ├── copy.cc │ │ │ ├── host_memory.cc │ │ │ ├── malloc.cc │ │ │ ├── memory.cc │ │ │ ├── sysv_allocator.cc │ │ │ └── tensor_shape.cc │ │ ├── types.cc │ │ └── utils.cc │ └── tests/ │ ├── BUILD.bazel │ ├── CMakeLists.txt │ ├── test_affinity.cc │ ├── test_async.cc │ ├── test_async_compute.cc │ ├── test_batcher.cc │ ├── test_common.cc │ ├── test_common.h │ ├── test_cyclic_allocator.cc │ ├── test_cyclic_windowed_buffer.cc │ ├── test_foo_memory.cc │ ├── test_main.cc │ ├── test_memory.cc │ ├── test_memory_old.cc │ ├── test_memory_stack.cc │ ├── test_pool.cc │ ├── test_stl_allocator.cc │ ├── test_sysv_allocator.cc │ ├── test_tensor.cc │ ├── test_thread_pool.cc │ ├── test_transactional_allocator.h │ └── test_types.cc ├── cuda/ │ ├── BUILD.bazel │ ├── CMakeLists.txt │ ├── benchmarks/ │ │ ├── CMakeLists.txt │ │ ├── bench_cuda_memory.cc │ │ └── bench_main.cc │ ├── include/ │ │ └── trtlab/ │ │ └── cuda/ │ │ ├── common.h │ │ ├── cyclic_windowed_buffer.h │ │ ├── device_guard.h │ │ ├── device_info.h │ │ ├── memory/ │ │ │ ├── cuda_allocators.h │ │ │ └── device_memory.h │ │ └── sync.h │ ├── src/ │ │ ├── copy.cc │ │ ├── cuda_allocators.cc │ │ ├── device_guard.cc │ │ └── device_info.cc │ └── tests/ │ ├── CMakeLists.txt │ ├── test_allocators.cc │ ├── test_device_info.cc │ ├── test_main.cc │ └── test_memory.cc ├── memory/ │ ├── CMakeLists.txt │ ├── benchmarks/ │ │ ├── CMakeLists.txt │ │ ├── bench_memory.cc │ │ ├── bench_memory_pool.cc │ │ └── main.cc │ ├── cmake/ │ │ ├── configuration.cmake │ │ └── dependencies.cmake │ ├── include/ │ │ └── trtlab/ │ │ └── memory/ │ │ ├── align.h │ │ ├── allocator.h │ │ ├── allocator_storage.h │ │ ├── allocator_traits.h │ │ ├── bfit_allocator.h │ │ ├── block_allocators.h │ │ ├── block_arena.h │ │ ├── block_manager.h │ │ ├── block_stack.h │ │ ├── config.h │ │ ├── debugging.h │ │ ├── deleter.h │ │ ├── descriptor.h │ │ ├── detail/ │ │ │ ├── assert.h │ │ │ ├── block_list.h │ │ │ ├── container_node_sizes.h │ │ │ ├── debug_helpers.h │ │ │ ├── free_list.h │ │ │ ├── memory_stack.h │ │ │ ├── page_info.h │ │ │ ├── ranges.h │ │ │ └── utility.h │ │ ├── error.h │ │ ├── huge_page_allocator.h │ │ ├── literals.h │ │ ├── malloc_allocator.h │ │ ├── memory_block.h │ │ ├── memory_pool.h │ │ ├── memory_resource.h │ │ ├── memory_type.h │ │ ├── memory_typed_allocator.h │ │ ├── posix_aligned_allocator.h │ │ ├── raii_allocator.h │ │ ├── smart_ptr.h │ │ ├── std_allocator.h │ │ ├── threading.h │ │ ├── trackers.h │ │ ├── tracking.h │ │ ├── transactional_allocator.h │ │ └── utils.h │ ├── src/ │ │ ├── CMakeLists.txt │ │ ├── align.cc │ │ ├── block_stack.cc │ │ ├── config.h.in │ │ ├── descriptor.cc │ │ ├── detail/ │ │ │ ├── block_list.cc │ │ │ ├── free_list.cc │ │ │ ├── free_list_utils.h │ │ │ └── page_info.c │ │ ├── error.cc │ │ ├── ilog2.h │ │ ├── memory_type.cc │ │ ├── trackers.cc │ │ └── utils.cc │ ├── tests/ │ │ ├── CMakeLists.txt │ │ ├── test_main.cc │ │ └── test_memory.cc │ └── tools/ │ ├── CMakeLists.txt │ ├── node_size_debugger.cpp │ ├── node_size_debugger.hpp │ └── test_types.hpp ├── nvrpc/ │ ├── BUILD.bazel │ ├── CMakeLists.txt │ ├── include/ │ │ └── nvrpc/ │ │ ├── client/ │ │ │ ├── base_context.h │ │ │ ├── client_single_up_multiple_down.h │ │ │ ├── client_streaming.h │ │ │ ├── client_streaming_v2.h │ │ │ ├── client_streaming_v3.h │ │ │ ├── client_unary.h │ │ │ ├── client_unary_v2.h │ │ │ └── executor.h │ │ ├── context.h │ │ ├── executor.h │ │ ├── fiber/ │ │ │ └── executor.h │ │ ├── interfaces.h │ │ ├── life_cycle_batching.h │ │ ├── life_cycle_bidirectional.h │ │ ├── life_cycle_streaming.h │ │ ├── life_cycle_unary.h │ │ ├── rpc.h │ │ ├── server.h │ │ └── service.h │ ├── src/ │ │ ├── client/ │ │ │ └── client_executor.cc │ │ ├── executor.cc │ │ └── server.cc │ └── tests/ │ ├── CMakeLists.txt │ ├── test_build_client.h │ ├── test_build_server.h │ ├── test_pingpong.cc │ ├── test_pingpong.h │ ├── test_resources.cc │ ├── test_resources.h │ ├── test_server.cc │ └── testing.proto ├── pybind/ │ ├── CMakeLists.txt │ └── trtlab/ │ ├── CMakeLists.txt │ ├── infer.cc │ ├── utils.cc │ └── utils.h └── tensorrt/ ├── BUILD.bazel ├── CMakeLists.txt ├── include/ │ └── trtlab/ │ └── tensorrt/ │ ├── allocator.h │ ├── bindings.h │ ├── buffers.h │ ├── common.h │ ├── execution_context.h │ ├── infer_bench.h │ ├── infer_runner.h │ ├── inference_manager.h │ ├── model.h │ ├── runtime.h │ ├── utils.h │ └── workspace.h ├── src/ │ ├── allocator.cc │ ├── bindings.cc │ ├── buffers.cc │ ├── execution_context.cc │ ├── infer_bench.cc │ ├── inference_manager.cc │ ├── model.cc │ ├── runtime.cc │ ├── utils.cc │ └── workspace.cc └── tests/ ├── CMakeLists.txt └── test_buffers.cc ================================================ FILE CONTENTS ================================================ ================================================ FILE: .bazelrc ================================================ build --cxxopt=-std=c++1z build --incompatible_remove_native_http_archive=false build --incompatible_package_name_is_a_function=false ================================================ FILE: .clang-format ================================================ #BasedOnStyle: Google Language: Cpp # BasedOnStyle: LLVM AccessModifierOffset: -2 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlinesLeft: true AlignOperands: true AlignTrailingComments: false AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortIfStatementsOnASingleLine: true AllowShortFunctionsOnASingleLine: true AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: true AfterControlStatement: true AfterEnum: true AfterFunction: true AfterNamespace: false AfterObjCDeclaration: false AfterStruct: true AfterUnion: true BeforeCatch: true BeforeElse: true IndentBraces: false BreakBeforeBinaryOperators: None BreakBeforeBraces: Custom BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 100 CommentPragmas: '^ IWYU pragma:' ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: false DisableFormat: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IncludeCategories: - Regex: '^"(llvm|llvm-c|clang|clang-c)/' Priority: 2 - Regex: '^(<|"(gtest|isl|json)/)' Priority: 3 - Regex: '.*' Priority: 1 IncludeIsMainRegex: '$' IndentCaseLabels: true IndentWidth: 4 IndentWrappedFunctionNames: true JavaScriptQuotes: Leave KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 100 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Left ReflowComments: true SortIncludes: true SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: Never SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: false SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Cpp11 TabWidth: 4 UseTab: Never ================================================ FILE: .dockerignore ================================================ build *.engine models @eaDir __pycache__ bazel-* ================================================ FILE: .gitmodules ================================================ [submodule "third_party/cpuaff"] path = third_party/cpuaff url = https://github.com/dcdillon/cpuaff [submodule "third_party/gflags"] path = third_party/gflags url = https://github.com/gflags/gflags.git [submodule "third_party/glog"] path = third_party/glog url = https://github.com/google/glog.git [submodule "third_party/grpc"] path = third_party/grpc url = https://github.com/grpc/grpc [submodule "third_party/wait-for-it"] path = third_party/wait-for-it url = https://github.com/vishnubob/wait-for-it [submodule "third_party/benchmark"] path = third_party/benchmark url = https://github.com/google/benchmark.git [submodule "third_party/googletest"] path = third_party/googletest url = https://github.com/google/googletest.git [submodule "third_party/pybind11"] path = third_party/pybind11 url = https://github.com/pybind/pybind11.git [submodule "third_party/flatbuffers"] path = third_party/flatbuffers url = https://github.com/google/flatbuffers.git ================================================ FILE: BUILD.bazel ================================================ package(default_visibility = ["//visibility:public"]) ================================================ FILE: CLA ================================================ The NVIDIA TensorRT Laboratory Software Grant and Corporate Contributor License Agreement ("Agreement") Thank you for your interest in the NVIDIA TensorRT Laboratory Project (the "Project"). In order to clarify the intellectual property license granted with Contributions from any person or entity, NVIDIA Corporation (the “Copyright Holders") must have a Contributor License Agreement (CLA) on file that has been signed by each Contributor, indicating agreement to the license terms below. This license is for your protection as a Contributor as well as the protection of the Project and its users; it does not change your rights to use your own Contributions for any other purpose. This version of the Agreement allows an entity (the "Corporation") to submit Contributions to the Project, to authorize Contributions submitted by its designated employees to the Project, and to grant copyright and patent licenses thereto to the Copyright Holders. If you have not already done so, please complete and sign, then scan and email a pdf file of this Agreement to rolson@nvidia.com. Please read this document carefully before signing and keep a copy for your records. Corporation name: ________________________________________________ Corporation address: ________________________________________________ ________________________________________________ ________________________________________________ Point of Contact: ________________________________________________ E-Mail: ________________________________________________ Telephone: _____________________ Fax: _____________________ You accept and agree to the following terms and conditions for Your present and future Contributions submitted to the Project. In return, the Copyright Holders shall not use Your Contributions in a way that is contrary to the public benefit or inconsistent with its nonprofit status and bylaws in effect at the time of the Contribution. Except for the license granted herein to the Copyright Holders and recipients of software distributed by the Copyright Holders, You reserve all right, title, and interest in and to Your Contributions. 1. Definitions. "You" (or "Your") shall mean the copyright owner or legal entity authorized by the copyright owner that is making this Agreement with the Copyright Holders. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "Contribution" shall mean the code, documentation or other original works of authorship expressly identified in Schedule B, as well as any original work of authorship, including any modifications or additions to an existing work, that is intentionally submitted by You to the Copyright Holders for inclusion in, or documentation of, any of the products owned or managed by the Copyright Holders (the "Work"). For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Copyright Holders or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Copyright Holders for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." 2. Grant of Copyright License. Subject to the terms and conditions of this Agreement, You hereby grant to the Copyright Holders and to recipients of software distributed by the Copyright Holders a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works. 3. Grant of Patent License. Subject to the terms and conditions of this Agreement, You hereby grant to the Copyright Holders and to recipients of software distributed by the Copyright Holders a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) were submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that your Contribution, or the Work to which you have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed. 4. You represent that You are legally entitled to grant the above license. You represent further that each employee of the Corporation designated on Schedule A below (or in a subsequent written modification to that Schedule) is authorized to submit Contributions on behalf of the Corporation. 5. You represent that each of Your Contributions is Your original creation (see section 7 for submissions on behalf of others). 6. You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all. Unless required by applicable law or agreed to in writing, You provide Your Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. 7. Should You wish to submit work that is not Your original creation, You may submit it to the Copyright Holders separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which you are personally aware, and conspicuously marking the work as "Submitted on behalf of a third-party: [named here]". 8. It is your responsibility to notify the Copyright Holders when any change is required to the list of designated employees authorized to submit Contributions on behalf of the Corporation, or to the Corporation's Point of Contact with the Copyright Holders. Please sign: __________________________________ Date: _______________ Title: __________________________________ Corporation: __________________________________ Schedule A [Initial list of designated employees. NB: authorization is not tied to particular Contributions.] Schedule B [Identification of optional concurrent software grant. Would be left blank or omitted if there is no concurrent software grant.] ================================================ FILE: CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmake_minimum_required(VERSION 3.13 FATAL_ERROR) option (BUILD_DEPENDENCIES "Whether or not a superbuild should be invoked" ON) option (BUILD_MEMORY "Whether or not build trtlab/memory" ON) option (BUILD_CORE "Whether or not build trtlab/core" ON) option (BUILD_CUDA "Whether or not build trtlab/cuda" ON) option (BUILD_NVRPC "Whether or not build trtlab/nvrpc" ON) option (BUILD_TENSORRT "Whether or not build trtlab/tensorrt" ON) option (BUILD_PYTHON "Whether or not build trtlab/pybind" OFF) option (BUILD_EXAMPLES "Whether or not to build trtlab examples" OFF) if (BUILD_DEPENDENCIES) project (trtlab_dependencies NONE) include (cmake/dependencies.cmake) return() # stop processing this file further else() project (trtlab) endif() # CMake path list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake") # trtlab add_subdirectory(trtlab) if (BUILD_EXAMPLES) add_subdirectory(examples) endif() ================================================ FILE: CREDITS.md ================================================ `ThreadPool` class was derived from https://github.com/progschj/ThreadPool > Copyright (c) 2012 Jakob Progsch, Václav Zeman > > This software is provided 'as-is', without any express or implied > warranty. In no event will the authors be held liable for any damages > arising from the use of this software. > > Permission is granted to anyone to use this software for any purpose, > including commercial applications, and to alter it and redistribute it > freely, subject to the following restrictions: > > 1. The origin of this software must not be misrepresented; you must not > claim that you wrote the original software. If you use this software > in a product, an acknowledgment in the product documentation would be > appreciated but is not required. > > 2. Altered source versions must be plainly marked as such, and must not be > misrepresented as being the original software. > > 3. This notice may not be removed or altered from any source > distribution. > > Modifications to the original work include: > * Header-only file was split into .h/.cc files > * Added an extra safety check (lines 30-31) in the construction (.cc file). > * Added CPU affinity options to the constructor ----- `cpuaff` is distributed unmodified from the original in [`third-party/cpuaff`] (https://github.com/dcdillon/cpuaff) > Copyright (c) 2015, Daniel C. Dillon > All rights reserved. > > Redistribution and use in source and binary forms, with or without > modification, are permitted provided that the following conditions are met: > > * Redistributions of source code must retain the above copyright notice, this > list of conditions and the following disclaimer. > > * Redistributions in binary form must reproduce the above copyright notice, > this list of conditions and the following disclaimer in the documentation > and/or other materials provided with the distribution. > > * Neither the name of cpuaff nor the names of its > contributors may be used to endorse or promote products derived from > this software without specific prior written permission. > > THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" > AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE > IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE > DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE > FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL > DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR > SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER > CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, > OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----- `wait-for-it.sh` is distributed unmodified from the original in [`third-pary/wait-for-it`] (https://github.com/vishnubob/wait-for-it). > The MIT License (MIT) > Copyright (c) 2016 Giles Hall > > Permission is hereby granted, free of charge, to any person obtaining a copy of > this software and associated documentation files (the "Software"), to deal in > the Software without restriction, including without limitation the rights to > use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies > of the Software, and to permit persons to whom the Software is furnished to do > so, subject to the following conditions: > > The above copyright notice and this permission notice shall be included in all > copies or substantial portions of the Software. > > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE --- Example gRPC client code was used with modification from the [gRPC project] (https://github.com/grpc/grpc), specifically the [synchronous c++ client] (https://github.com/grpc/grpc/blob/master/examples/cpp/helloworld/greeter_client.cc) > Copyright 2015 gRPC authors. > > Licensed under the Apache License, Version 2.0 (the "License"); > you may not use this file except in compliance with the License. > You may obtain a copy of the License at > > http://www.apache.org/licenses/LICENSE-2.0 > > Unless required by applicable law or agreed to in writing, software > distributed under the License is distributed on an "AS IS" BASIS, > WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > See the License for the specific language governing permissions and > limitations under the License. --- [moodycamel::ConcurrentQueue](https://github.com/cameron314/concurrentqueue) is added unmodified to the Docker images and loaded into the `playground` namespace. > Simplified BSD License: > > Copyright (c) 2013-2016, Cameron Desrochers. All rights reserved. > > Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: > > Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. > Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. > THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --- `transwarp` is used unmodified from the original [bloomen/transwarp](https://github.com/bloomen/transwarp) > MIT License > > Copyright (c) 2018-2019 Christian Blume, Guan Wang > > Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: > > The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. > > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --- Caffe ResNet-50 and ResNet-152 models from [KaimingHe/deep-residual-networks] (https://github.com/KaimingHe/deep-residual-networks) are included without modification. > The MIT License (MIT) > > Copyright (c) 2016 Shaoqing Ren > > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal > in the Software without restriction, including without limitation the rights > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > copies of the Software, and to permit persons to whom the Software is > furnished to do so, subject to the following conditions: > > The above copyright notice and this permission notice shall be included in all > copies or substantial portions of the Software. > > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. --- ================================================ FILE: Dockerfile ================================================ # stage 1 - development container # holds the core nvidia libraries but does not container the project source code # use this container for development by mapping our source into the image which # persists your source code outside of the container lifecycle FROM nvcr.io/nvidia/tensorrt:20.06-py3 AS base RUN apt update RUN apt install -y clang-format libssl-dev openssl libz-dev software-properties-common # remove base cmake RUN apt remove --purge -y cmake RUN apt autoremove -y RUN apt autoclean -y # install cmake ppa from kitware - https://apt.kitware.com/ RUN apt install -y apt-transport-https ca-certificates gnupg software-properties-common wget RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' RUN apt update && apt install -y cmake # then remove FindGTest.cmake installed by cmake RUN find / -name "FindGTest.cmake" -exec rm -f {} \; # add cufft and nvml to the container image RUN apt install -y libcufft-dev-11-0 cuda-nvml-dev-11-0 # override some envs ENV LD_LIBRARY_PATH=/externals/myelin/x86_64/cuda-11.0/lib:/externals/cudnn/x86_64/8.0/cuda-11.0/lib64:/usr/local/cuda-11.0/targets/x86_64-linux/lib ENV CCACHE_DIR=/tmp/.ccache RUN cd /usr/lib/x86_64-linux-gnu && ln -s libnvidia-ml.so.1 libnvidia-ml.so # stage 2: build the project inside the dev container FROM base AS trtlab WORKDIR /work COPY . . RUN mkdir build && cd build && cmake .. && make -j ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2018-2019, NVIDIA Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # TensorRT Laboratory The TensorRT Laboratory (trtlab) is a general purpose set of tools to build customer inference applications and services. [Triton](https://github.com/nvidia/triton) is a professional grade production inference server. This project is broken into 4 primary components: * `memory` is based on [foonathan/memory](https://github.com/foonathan/memory) the `memory` module was designed to write custom allocators for both host and gpu memory. Several custom allocators are included. * `core` contains host/cpu-side tools for common components such as thread pools, resource pool, and userspace threading based on boost fibers. * `cuda` extends `memory` with a new memory_type for CUDA device memory. All custom allocators in `memory` can be used with `device_memory`, `device_managed_memory` or `host_pinned_memory`. * `nvrpc` is an abstraction layer for building asynchronous microservices. The current implementation is based on grpc. * `tensorrt` provides an opinionated runtime built on the TensorRT API. ## Quickstart The easiest way to manage the external NVIDIA dependencies is to leverage the containers hosted on [NGC](https://ngc.nvidia.com). For bare metal installs, use the `Dockerfile` as a template for which NVIDIA libraries to install. ``` docker build -t trtlab . ``` For development purposes, the following set of commands first builds the base image, then maps the source code on the host into a running container. ``` docker build -t trtlab:dev --target base . docker run --rm -ti --gpus=all -v $PWD:/work --workdir=/work --net=host trtlab:dev bash ``` ## Copyright and License This project is released under the [BSD 3-clause license](LICENSE). ## Issues and Contributing * Please let us know by [filing a new issue](https://github.com/NVIDIA/tensorrt-laboratory/issues/new) * You can contribute by opening a [pull request](https://help.github.com/articles/using-pull-requests/) Pull requests with changes of 10 lines or more will require a [Contributor License Agreement](CLA). ================================================ FILE: WORKSPACE ================================================ workspace(name = "com_github_nvidia_trtlab") load(":bazel/repositories.bzl", "repositories") repositories() load ("//bazel:cuda_configure.bzl", "cuda_configure") cuda_configure(name = "local_config_cuda") load ("//bazel:tensorrt_configure.bzl", "tensorrt_configure") tensorrt_configure(name = "local_config_tensorrt") load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps") grpc_deps() ================================================ FILE: bazel/BUILD.bazel ================================================ exports_files( glob(["*.bzl"]), visibility = ["//visibility:public"], ) ================================================ FILE: bazel/cuda_configure.bzl ================================================ """Build rule generator for locally installed CUDA toolkit and cuDNN SDK.""" # src: https://github.com/google/nvidia_libs_test def _get_env_var(repository_ctx, name, default): if name in repository_ctx.os.environ: return repository_ctx.os.environ[name] return default def _impl(repository_ctx): cuda_path = _get_env_var(repository_ctx, "CUDA_PATH", "/usr/local/cuda") cudnn_path = _get_env_var(repository_ctx, "CUDNN_PATH", cuda_path) print("Using CUDA from %s\n" % cuda_path) print("Using cuDNN from %s\n" % cudnn_path) repository_ctx.symlink(cuda_path, "cuda") repository_ctx.symlink(cudnn_path, "cudnn") repository_ctx.file("nvcc.sh", """ #! /bin/bash repo_path=%s compiler=${CC:+"--compiler-bindir=$CC"} $repo_path/cuda/bin/nvcc $compiler --compiler-options=-fPIC --include-path=$repo_path $* """ % repository_ctx.path(".")) repository_ctx.file("BUILD", """ package(default_visibility = ["//visibility:public"]) sh_binary( name = "nvcc", srcs = ["nvcc.sh"], ) # The *_headers cc_library rules below aren't cc_inc_library rules because # dependent targets would only see the first one. cc_library( name = "cuda_headers", hdrs = glob( include = ["cuda/include/**/*.h*"], exclude = ["cuda/include/cudnn.h"] ), # Allows including CUDA headers with angle brackets. includes = ["cuda/include"], ) cc_library( name = "cuda", srcs = ["cuda/lib64/stubs/libcuda.so"], linkopts = ["-ldl"], ) cc_library( name = "cuda_runtime", srcs = ["cuda/lib64/libcudart_static.a"], deps = [":cuda"], linkopts = ["-lrt"], ) cc_library( name = "curand_static", srcs = [ "cuda/lib64/libcurand_static.a", "cuda/lib64/libculibos.a", ], ) cc_library( name = "cupti_headers", hdrs = glob(["cuda/extras/CUPTI/include/**/*.h"]), # Allows including CUPTI headers with angle brackets. includes = ["cuda/extras/CUPTI/include"], ) cc_library( name = "cupti", srcs = glob(["cuda/extras/CUPTI/lib64/libcupti.so*"]), ) cc_library( name = "cudnn", srcs = [ "cudnn/lib64/libcudnn_static.a", "cuda/lib64/libcublas_static.a", "cuda/lib64/libculibos.a", ], hdrs = ["cudnn/include/cudnn.h"], deps = [ ":cuda", ":cuda_headers" ], ) cc_library( name = "cuda_util", deps = [":cuda_util_compile"], ) """) cuda_configure = repository_rule( implementation = _impl, environ = ["CUDA_PATH", "CUDNN_PATH"], ) ================================================ FILE: bazel/repositories.bzl ================================================ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") def repositories(): _maybe( http_archive, name = "com_github_antonovvk_bazel_rules", sha256 = "ba75b07d3fd297375a6688e9a16583eb616e7a74b3d5e8791e7a222cf36ab26e", strip_prefix = "bazel_rules-98ddd7e4f7c63ea0868f08bcc228463dac2f9f12", urls = [ "https://mirror.bazel.build/github.com/antonovvk/bazel_rules/archive/98ddd7e4f7c63ea0868f08bcc228463dac2f9f12.tar.gz", "https://github.com/antonovvk/bazel_rules/archive/98ddd7e4f7c63ea0868f08bcc228463dac2f9f12.tar.gz", ], ) _maybe( http_archive, name = "com_github_gflags_gflags", sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe", strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a", urls = [ "https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz", "https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz", ], ) _maybe( http_archive, name = "com_google_glog", sha256 = "1ee310e5d0a19b9d584a855000434bb724aa744745d5b8ab1855c85bff8a8e21", strip_prefix = "glog-028d37889a1e80e8a07da1b8945ac706259e5fd8", urls = [ "https://mirror.bazel.build/github.com/google/glog/archive/028d37889a1e80e8a07da1b8945ac706259e5fd8.tar.gz", "https://github.com/google/glog/archive/028d37889a1e80e8a07da1b8945ac706259e5fd8.tar.gz", ], ) _maybe( http_archive, name = "com_google_googletest", sha256 = "c18f281fd6621bb264570b99860a0241939b4a251c9b1af709b811d33bc63af8", strip_prefix = "googletest-e3bd4cbeaeef3cee65a68a8bd3c535cb779e9b6d", urls = [ "https://mirror.bazel.build/github.com/google/googletest/archive/e3bd4cbeaeef3cee65a68a8bd3c535cb779e9b6d.tar.gz", "https://github.com/google/googletest/archive/e3bd4cbeaeef3cee65a68a8bd3c535cb779e9b6d.tar.gz", ], ) _maybe( http_archive, name = "com_github_grpc_grpc", strip_prefix = "grpc-1.16.1", urls = [ "https://github.com/grpc/grpc/archive/v1.16.1.tar.gz", ], ) def load_trtis(): http_archive( name = "com_github_nvidia_trtis", strip_prefix = "tensorrt-inference-server-0.9.0", urls = [ "https://github.com/NVIDIA/tensorrt-inference-server/archive/v0.9.0.tar.gz", ], ) def load_benchmark(): http_archive( name = "com_github_google_benchmark", sha256 = "f8e525db3c42efc9c7f3bc5176a8fa893a9a9920bbd08cef30fb56a51854d60d", strip_prefix = "benchmark-1.4.1", urls = [ "https://github.com/google/benchmark/archive/v1.4.1.tar.gz", ], ) def _maybe(repo_rule, name, **kwargs): if name not in native.existing_rules(): repo_rule(name = name, **kwargs) ================================================ FILE: bazel/tensorrt_configure.bzl ================================================ """Build rule generator for locally installed TensorRT.""" # inspired from: https://github.com/google/nvidia_libs_test def _get_env_var(repository_ctx, name, default): if name in repository_ctx.os.environ: return repository_ctx.os.environ[name] return default def _impl(repository_ctx): hdrs_path = _get_env_var(repository_ctx, "TENSORRT_HDRS_PATH", "/usr/include/x86_64-linux-gnu") libs_path = _get_env_var(repository_ctx, "TENSORRT_LIBS_PATH", "/usr/lib/x86_64-linux-gnu") print("Using TensorRT Headers from %s\n" % hdrs_path) print("Using TensorRT Libs from %s\n" % libs_path) repository_ctx.symlink(hdrs_path, "include") repository_ctx.symlink(libs_path, "libs") repository_ctx.file("BUILD", """ package(default_visibility = ["//visibility:public"]) # The *_headers cc_library rules below aren't cc_inc_library rules because # dependent targets would only see the first one. cc_library( name = "tensorrt_headers", hdrs = glob( include = ["include/Nv*.h"], ), strip_include_prefix = "include", # Allows including CUDA headers with angle brackets. # includes = ["cuda/include"], ) cc_library( name = "tensorrt_infer", srcs = ["libs/libnvinfer.so"], linkopts = ["-ldl"], ) """) tensorrt_configure = repository_rule( implementation = _impl, environ = ["TENSORRT_HDRS_PATH", "TENSORRT_LIBS_PATH"], ) ================================================ FILE: build.sh ================================================ #!/bin/bash # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # mkdir -p build cd build cmake -DCMAKE_BUILD_TYPE=Release .. cmake -DCMAKE_BUILD_TYPE=Release .. make -j cd /work/notebooks ln -f -s /work/build/tensorrt-laboratory/python/trtlab/trtlab.cpython-35m-x86_64-linux-gnu.so #make install ================================================ FILE: cmake/FindTensorRT.cmake ================================================ # This module defines the following variables: # # :: # # TensorRT_INCLUDE_DIRS # TensorRT_LIBRARIES # TensorRT_FOUND # # :: # # TensorRT_VERSION_STRING - version (x.y.z) # TensorRT_VERSION_MAJOR - major version (x) # TensorRT_VERSION_MINOR - minor version (y) # TensorRT_VERSION_PATCH - patch version (z) # # Hints # ^^^^^ # A user may set ``TensorRT_ROOT`` to an installation root to tell this module where to look. # set(_TensorRT_SEARCHES) if(TensorRT_ROOT) set(_TensorRT_SEARCH_ROOT PATHS ${TensorRT_ROOT} NO_DEFAULT_PATH) list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_ROOT) endif() # appends some common paths set(_TensorRT_SEARCH_NORMAL PATHS "/usr" ) list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_NORMAL) # Include dir foreach(search ${_TensorRT_SEARCHES}) find_path(TensorRT_INCLUDE_DIR NAMES NvInfer.h ${${search}} PATH_SUFFIXES include) endforeach() if(NOT TensorRT_LIBRARY) foreach(search ${_TensorRT_SEARCHES}) find_library(TensorRT_LIBRARY NAMES nvinfer ${${search}} PATH_SUFFIXES lib) endforeach() endif() mark_as_advanced(TensorRT_INCLUDE_DIR) if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h") file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") endif() include(FindPackageHandleStandardArgs) FIND_PACKAGE_HANDLE_STANDARD_ARGS(TensorRT REQUIRED_VARS TensorRT_LIBRARY TensorRT_INCLUDE_DIR VERSION_VAR TensorRT_VERSION_STRING) if(TensorRT_FOUND) set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR}) if(NOT TensorRT_LIBRARIES) set(TensorRT_LIBRARIES ${TensorRT_LIBRARY}) endif() if(NOT TARGET TensorRT::TensorRT) add_library(TensorRT::TensorRT UNKNOWN IMPORTED) set_target_properties(TensorRT::TensorRT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}") set_property(TARGET TensorRT::TensorRT APPEND PROPERTY IMPORTED_LOCATION "${TensorRT_LIBRARY}") endif() endif() ================================================ FILE: cmake/Findcpuaff.cmake ================================================ # This module defines the following variables: # # :: # # CPUAFF_INCLUDE_DIRS # CPUAFF_FOUND # # :: # # Hints # ^^^^^ # A user may set ``CPUAFF_ROOT`` to an installation root to tell this module where to look. # set(CPUAFF_FOUND FALSE) set(_CPUAFF_SEARCHES) if(CPUAFF_ROOT) set(_CPUAFF_SEARCH_ROOT PATHS ${CPUAFF_ROOT} NO_DEFAULT_PATH) list(APPEND _CPUAFF_SEARCHES _CPUAFF_SEARCH_ROOT) else() list(APPEND _CPUAFF_SEARCHES "/usr") list(APPEND _CPUAFF_SEARCHES "/usr/local") endif() # Include dir foreach(search ${_CPUAFF_SEARCHES}) find_path( CPUAFF_INCLUDE_DIR NAMES cpuaff/cpuaff.hpp PATHS ${CPUAFF_ROOT} PATH_SUFFIXES include) message(STATUS "cpuaff: ${CPUAFF_INCLUDE_DIR}") endforeach() mark_as_advanced(CPUAFF_INCLUDE_DIR) if(CPUAFF_INCLUDE_DIR AND EXISTS "${CPUAFF_INCLUDE_DIR}/cpuaff/cpuaff.hpp") set(CPUAFF_FOUND True) add_library(cpuaff INTERFACE) target_include_directories(cpuaff INTERFACE ${CPUAFF_INCLUDE_DIR}) endif() ================================================ FILE: cmake/GRPCGenerateCPP.cmake ================================================ find_package(gRPC REQUIRED COMPONENTS grpc_cpp_plugin) set(_gRPC_CPP_PLUGIN_EXECUTABLE $) function(PROTOBUF_GENERATE_GRPC_CPP SRCS HDRS) cmake_parse_arguments(protobuf "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN}) set(PROTO_FILES "${protobuf_UNPARSED_ARGUMENTS}") if(NOT PROTO_FILES) message(SEND_ERROR "Error: PROTOBUF_GENERATE_GRPC_CPP() called without any proto files") return() endif() if(PROTOBUF_GENERATE_CPP_APPEND_PATH) # This variable is common for all types of output. # Create an include path for each file specified foreach(FIL ${PROTO_FILES}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(ABS_PATH ${ABS_FIL} PATH) list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) if(${_contains_already} EQUAL -1) list(APPEND _protobuf_include_path -I ${ABS_PATH}) endif() endforeach() else() set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) endif() if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) set(Protobuf_IMPORT_DIR "${PROTOBUF_IMPORT_DIRS") endif() if(DEFINED Protobuf_IMPORT_DIRS) foreach(DIR ${Protobuf_IMPORT_DIRS}) get_filename_component(ABS_PATH ${DIR} ABSOLUTE) list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) if(${_contains_already} EQUAL -1) list(APPEND _protobuf_include_path -I ${ABS_PATH}) endif() endforeach() endif() set(${SRCS}) set(${HDRS}) foreach(FIL ${PROTO_FILES}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(FIL_WE ${FIL} NAME_WE) if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) get_filename_component(FIL_DIR ${FIL} DIRECTORY) if(FIL_DIR) set(FIL_WE "${FIL_DIR}/${FIL_WE}") endif() endif() set(_protobuf_grpc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc") set(_protobuf_grpc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h") list(APPEND ${SRCS} "${_protobuf_grpc_src}") list(APPEND ${HDRS} "${_protobuf_grpc_hdr}") add_custom_command( OUTPUT "${_protobuf_grpc_src}" "${_protobuf_grpc_hdr}" COMMAND ${Protobuf_PROTOC_EXECUTABLE} --grpc_out=${CMAKE_CURRENT_BINARY_DIR} --plugin=protoc-gen-grpc=${_gRPC_CPP_PLUGIN_EXECUTABLE} ${_protobuf_include_path} ${ABS_FIL} DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} COMMENT "Running gRPC C++ protocol buffer compiler on ${FIL}" VERBATIM) endforeach() set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) set(${SRCS} ${${SRCS}} PARENT_SCOPE) set(${HDRS} ${${HDRS}} PARENT_SCOPE) endfunction() ================================================ FILE: cmake/GRPCGenerateCPPLikeBazel.cmake ================================================ find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin) # Get full path to plugin function(PROTOBUF_GENERATE_GRPC_CPP_LIKE_BAZEL SRCS HDRS) cmake_parse_arguments(protobuf "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN}) set(PROTO_FILES "${protobuf_UNPARSED_ARGUMENTS}") if(NOT PROTO_FILES) message(SEND_ERROR "Error: PROTOBUF_GENERATE_GRPC_CPP() called without any proto files") return() endif() if(protobuf_EXPORT_MACRO) set(DLL_EXPORT_DECL "dllexport_decl=${protobuf_EXPORT_MACRO}:") endif() get_filename_component(ABS_PROTO_PATH ${CMAKE_SOURCE_DIR} ABSOLUTE) set(EXTRA_ARGS "--proto_path=${ABS_PROTO_PATH}") file(RELATIVE_PATH Protobuf_PRE_IMPORT_DIRS ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) if(PROTOBUF_GENERATE_CPP_APPEND_PATH) # This variable is common for all types of output. # Create an include path for each file specified foreach(FIL ${PROTO_FILES}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(ABS_PATH ${ABS_FIL} PATH) list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) if(${_contains_already} EQUAL -1) list(APPEND _protobuf_include_path -I ${ABS_PATH}) endif() endforeach() else() set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) endif() if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) set(Protobuf_IMPORT_DIR "${PROTOBUF_IMPORT_DIRS") endif() if(DEFINED Protobuf_IMPORT_DIRS) foreach(DIR ${Protobuf_IMPORT_DIRS}) get_filename_component(ABS_PATH ${DIR} ABSOLUTE) list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) if(${_contains_already} EQUAL -1) list(APPEND _protobuf_include_path -I ${ABS_PATH}) endif() endforeach() endif() set(${SRCS}) set(${HDRS}) foreach(FIL ${PROTO_FILES}) message(STATUS "grpc_cpp_proto: ${FIL}") get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(FIL_WE ${FIL} NAME_WE) message(STATUS "grpc_cpp_proto_abs: ${ABS_FIL}") if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) get_filename_component(FIL_DIR ${FIL} DIRECTORY) if(FIL_DIR) set(FIL_WE "${FIL_DIR}/${FIL_WE}") endif() endif() if(Protobuf_PRE_IMPORT_DIRS) set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${Protobuf_PRE_IMPORT_DIRS}/${FIL_WE}.grpc.pb.cc") set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${Protobuf_PRE_IMPORT_DIRS}/${FIL_WE}.grpc.pb.h") else() set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc") set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h") endif() message(STATUS "grpc_cpp_src: ${_protobuf_protoc_src}") list(APPEND ${SRCS} "${_protobuf_protoc_src}") list(APPEND ${HDRS} "${_protobuf_protoc_hdr}") add_custom_command( OUTPUT "${_protobuf_protoc_src}" "${_protobuf_protoc_hdr}" COMMAND ${Protobuf_PROTOC_EXECUTABLE} ${EXTRA_ARGS} "--grpc_out=${CMAKE_CURRENT_BINARY_DIR}" "--plugin=protoc-gen-grpc=${GRPC_CPP_PLUGIN}" ${_protobuf_protoc_flags} ${_protobuf_include_path} ${ABS_FIL} DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} COMMENT "Running gRPC C++ protocol buffer compiler on ${FIL}" VERBATIM) endforeach() set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) set(${SRCS} ${${SRCS}} PARENT_SCOPE) set(${HDRS} ${${HDRS}} PARENT_SCOPE) endfunction() ================================================ FILE: cmake/LibFindMacros.cmake ================================================ # Version 2.2 # Public Domain, originally written by Lasse Kärkkäinen # Maintained at https://github.com/Tronic/cmake-modules # Please send your improvements as pull requests on Github. # Find another package and make it a dependency of the current package. # This also automatically forwards the "REQUIRED" argument. # Usage: libfind_package( [extra args to find_package]) macro (libfind_package PREFIX PKG) set(${PREFIX}_args ${PKG} ${ARGN}) if (${PREFIX}_FIND_REQUIRED) set(${PREFIX}_args ${${PREFIX}_args} REQUIRED) endif() find_package(${${PREFIX}_args}) set(${PREFIX}_DEPENDENCIES ${${PREFIX}_DEPENDENCIES};${PKG}) unset(${PREFIX}_args) endmacro() # A simple wrapper to make pkg-config searches a bit easier. # Works the same as CMake's internal pkg_check_modules but is always quiet. macro (libfind_pkg_check_modules) find_package(PkgConfig QUIET) if (PKG_CONFIG_FOUND) pkg_check_modules(${ARGN} QUIET) endif() endmacro() # Avoid useless copy&pasta by doing what most simple libraries do anyway: # pkg-config, find headers, find library. # Usage: libfind_pkg_detect( FIND_PATH [other args] FIND_LIBRARY [other args]) # E.g. libfind_pkg_detect(SDL2 sdl2 FIND_PATH SDL.h PATH_SUFFIXES SDL2 FIND_LIBRARY SDL2) function (libfind_pkg_detect PREFIX) # Parse arguments set(argname pkgargs) foreach (i ${ARGN}) if ("${i}" STREQUAL "FIND_PATH") set(argname pathargs) elseif ("${i}" STREQUAL "FIND_LIBRARY") set(argname libraryargs) else() set(${argname} ${${argname}} ${i}) endif() endforeach() if (NOT pkgargs) message(FATAL_ERROR "libfind_pkg_detect requires at least a pkg_config package name to be passed.") endif() # Find library libfind_pkg_check_modules(${PREFIX}_PKGCONF ${pkgargs}) if (pathargs) find_path(${PREFIX}_INCLUDE_DIR NAMES ${pathargs} HINTS ${${PREFIX}_PKGCONF_INCLUDE_DIRS}) endif() if (libraryargs) find_library(${PREFIX}_LIBRARY NAMES ${libraryargs} HINTS ${${PREFIX}_PKGCONF_LIBRARY_DIRS}) endif() endfunction() # Extracts a version #define from a version.h file, output stored to _VERSION. # Usage: libfind_version_header(Foobar foobar/version.h FOOBAR_VERSION_STR) # Fourth argument "QUIET" may be used for silently testing different define names. # This function does nothing if the version variable is already defined. function (libfind_version_header PREFIX VERSION_H DEFINE_NAME) # Skip processing if we already have a version or if the include dir was not found if (${PREFIX}_VERSION OR NOT ${PREFIX}_INCLUDE_DIR) return() endif() set(quiet ${${PREFIX}_FIND_QUIETLY}) # Process optional arguments foreach(arg ${ARGN}) if (arg STREQUAL "QUIET") set(quiet TRUE) else() message(AUTHOR_WARNING "Unknown argument ${arg} to libfind_version_header ignored.") endif() endforeach() # Read the header and parse for version number set(filename "${${PREFIX}_INCLUDE_DIR}/${VERSION_H}") if (NOT EXISTS ${filename}) if (NOT quiet) message(AUTHOR_WARNING "Unable to find ${${PREFIX}_INCLUDE_DIR}/${VERSION_H}") endif() return() endif() file(READ "${filename}" header) string(REGEX REPLACE ".*#[ \t]*define[ \t]*${DEFINE_NAME}[ \t]*\"([^\n]*)\".*" "\\1" match "${header}") # No regex match? if (match STREQUAL header) if (NOT quiet) message(AUTHOR_WARNING "Unable to find \#define ${DEFINE_NAME} \"\" from ${${PREFIX}_INCLUDE_DIR}/${VERSION_H}") endif() return() endif() # Export the version string set(${PREFIX}_VERSION "${match}" PARENT_SCOPE) endfunction() # Do the final processing once the paths have been detected. # If include dirs are needed, ${PREFIX}_PROCESS_INCLUDES should be set to contain # all the variables, each of which contain one include directory. # Ditto for ${PREFIX}_PROCESS_LIBS and library files. # Will set ${PREFIX}_FOUND, ${PREFIX}_INCLUDE_DIRS and ${PREFIX}_LIBRARIES. # Also handles errors in case library detection was required, etc. function (libfind_process PREFIX) # Skip processing if already processed during this configuration run if (${PREFIX}_FOUND) return() endif() set(found TRUE) # Start with the assumption that the package was found # Did we find any files? Did we miss includes? These are for formatting better error messages. set(some_files FALSE) set(missing_headers FALSE) # Shorthands for some variables that we need often set(quiet ${${PREFIX}_FIND_QUIETLY}) set(required ${${PREFIX}_FIND_REQUIRED}) set(exactver ${${PREFIX}_FIND_VERSION_EXACT}) set(findver "${${PREFIX}_FIND_VERSION}") set(version "${${PREFIX}_VERSION}") # Lists of config option names (all, includes, libs) unset(configopts) set(includeopts ${${PREFIX}_PROCESS_INCLUDES}) set(libraryopts ${${PREFIX}_PROCESS_LIBS}) # Process deps to add to foreach (i ${PREFIX} ${${PREFIX}_DEPENDENCIES}) if (DEFINED ${i}_INCLUDE_OPTS OR DEFINED ${i}_LIBRARY_OPTS) # The package seems to export option lists that we can use, woohoo! list(APPEND includeopts ${${i}_INCLUDE_OPTS}) list(APPEND libraryopts ${${i}_LIBRARY_OPTS}) else() # If plural forms don't exist or they equal singular forms if ((NOT DEFINED ${i}_INCLUDE_DIRS AND NOT DEFINED ${i}_LIBRARIES) OR ({i}_INCLUDE_DIR STREQUAL ${i}_INCLUDE_DIRS AND ${i}_LIBRARY STREQUAL ${i}_LIBRARIES)) # Singular forms can be used if (DEFINED ${i}_INCLUDE_DIR) list(APPEND includeopts ${i}_INCLUDE_DIR) endif() if (DEFINED ${i}_LIBRARY) list(APPEND libraryopts ${i}_LIBRARY) endif() else() # Oh no, we don't know the option names message(FATAL_ERROR "We couldn't determine config variable names for ${i} includes and libs. Aieeh!") endif() endif() endforeach() if (includeopts) list(REMOVE_DUPLICATES includeopts) endif() if (libraryopts) list(REMOVE_DUPLICATES libraryopts) endif() string(REGEX REPLACE ".*[ ;]([^ ;]*(_INCLUDE_DIRS|_LIBRARIES))" "\\1" tmp "${includeopts} ${libraryopts}") if (NOT tmp STREQUAL "${includeopts} ${libraryopts}") message(AUTHOR_WARNING "Plural form ${tmp} found in config options of ${PREFIX}. This works as before but is now deprecated. Please only use singular forms INCLUDE_DIR and LIBRARY, and update your find scripts for LibFindMacros > 2.0 automatic dependency system (most often you can simply remove the PROCESS variables entirely).") endif() # Include/library names separated by spaces (notice: not CMake lists) unset(includes) unset(libs) # Process all includes and set found false if any are missing foreach (i ${includeopts}) list(APPEND configopts ${i}) if (NOT "${${i}}" STREQUAL "${i}-NOTFOUND") list(APPEND includes "${${i}}") else() set(found FALSE) set(missing_headers TRUE) endif() endforeach() # Process all libraries and set found false if any are missing foreach (i ${libraryopts}) list(APPEND configopts ${i}) if (NOT "${${i}}" STREQUAL "${i}-NOTFOUND") list(APPEND libs "${${i}}") else() set (found FALSE) endif() endforeach() # Version checks if (found AND findver) if (NOT version) message(WARNING "The find module for ${PREFIX} does not provide version information, so we'll just assume that it is OK. Please fix the module or remove package version requirements to get rid of this warning.") elseif (version VERSION_LESS findver OR (exactver AND NOT version VERSION_EQUAL findver)) set(found FALSE) set(version_unsuitable TRUE) endif() endif() # If all-OK, hide all config options, export variables, print status and exit if (found) foreach (i ${configopts}) mark_as_advanced(${i}) endforeach() if (NOT quiet) message(STATUS "Found ${PREFIX} ${${PREFIX}_VERSION}") if (LIBFIND_DEBUG) message(STATUS " ${PREFIX}_DEPENDENCIES=${${PREFIX}_DEPENDENCIES}") message(STATUS " ${PREFIX}_INCLUDE_OPTS=${includeopts}") message(STATUS " ${PREFIX}_INCLUDE_DIRS=${includes}") message(STATUS " ${PREFIX}_LIBRARY_OPTS=${libraryopts}") message(STATUS " ${PREFIX}_LIBRARIES=${libs}") endif() set (${PREFIX}_INCLUDE_OPTS ${includeopts} PARENT_SCOPE) set (${PREFIX}_LIBRARY_OPTS ${libraryopts} PARENT_SCOPE) set (${PREFIX}_INCLUDE_DIRS ${includes} PARENT_SCOPE) set (${PREFIX}_LIBRARIES ${libs} PARENT_SCOPE) set (${PREFIX}_FOUND TRUE PARENT_SCOPE) endif() return() endif() # Format messages for debug info and the type of error set(vars "Relevant CMake configuration variables:\n") foreach (i ${configopts}) mark_as_advanced(CLEAR ${i}) set(val ${${i}}) if ("${val}" STREQUAL "${i}-NOTFOUND") set (val "") elseif (val AND NOT EXISTS ${val}) set (val "${val} (does not exist)") else() set(some_files TRUE) endif() set(vars "${vars} ${i}=${val}\n") endforeach() set(vars "${vars}You may use CMake GUI, cmake -D or ccmake to modify the values. Delete CMakeCache.txt to discard all values and force full re-detection if necessary.\n") if (version_unsuitable) set(msg "${PREFIX} ${${PREFIX}_VERSION} was found but") if (exactver) set(msg "${msg} only version ${findver} is acceptable.") else() set(msg "${msg} version ${findver} is the minimum requirement.") endif() else() if (missing_headers) set(msg "We could not find development headers for ${PREFIX}. Do you have the necessary dev package installed?") elseif (some_files) set(msg "We only found some files of ${PREFIX}, not all of them. Perhaps your installation is incomplete or maybe we just didn't look in the right place?") if(findver) set(msg "${msg} This could also be caused by incompatible version (if it helps, at least ${PREFIX} ${findver} should work).") endif() else() set(msg "We were unable to find package ${PREFIX}.") endif() endif() # Fatal error out if REQUIRED if (required) set(msg "REQUIRED PACKAGE NOT FOUND\n${msg} This package is REQUIRED and you need to install it or adjust CMake configuration in order to continue building ${CMAKE_PROJECT_NAME}.") message(FATAL_ERROR "${msg}\n${vars}") endif() # Otherwise just print a nasty warning if (NOT quiet) message(WARNING "WARNING: MISSING PACKAGE\n${msg} This package is NOT REQUIRED and you may ignore this warning but by doing so you may miss some functionality of ${CMAKE_PROJECT_NAME}. \n${vars}") endif() endfunction() ================================================ FILE: cmake/ProtobufGenerateCPPLikeBazel.cmake ================================================ function(PROTOBUF_GENERATE_CPP_LIKE_BAZEL SRCS HDRS) cmake_parse_arguments(protobuf "" "EXPORT_MACRO;DESCRIPTORS" "" ${ARGN}) set(PROTO_FILES "${protobuf_UNPARSED_ARGUMENTS}") if(NOT PROTO_FILES) message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files") return() endif() if(protobuf_EXPORT_MACRO) set(DLL_EXPORT_DECL "dllexport_decl=${protobuf_EXPORT_MACRO}:") endif() get_filename_component(ABS_PROTO_PATH ${CMAKE_SOURCE_DIR} ABSOLUTE) set(EXTRA_ARGS "--proto_path=${ABS_PROTO_PATH}") file(RELATIVE_PATH Protobuf_PRE_IMPORT_DIRS ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) if(PROTOBUF_GENERATE_CPP_APPEND_PATH) # Create an include path for each file specified foreach(FIL ${PROTO_FILES}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(ABS_PATH ${ABS_FIL} PATH) list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) if(${_contains_already} EQUAL -1) list(APPEND _protobuf_include_path -I ${ABS_PATH}) endif() endforeach() else() set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) endif() if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") endif() if(DEFINED Protobuf_IMPORT_DIRS) foreach(DIR ${Protobuf_IMPORT_DIRS}) get_filename_component(ABS_PATH ${DIR} ABSOLUTE) list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) if(${_contains_already} EQUAL -1) list(APPEND _protobuf_include_path -I ${ABS_PATH}) endif() endforeach() endif() set(${SRCS}) set(${HDRS}) if (protobuf_DESCRIPTORS) set(${protobuf_DESCRIPTORS}) endif() foreach(FIL ${PROTO_FILES}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(FIL_WE ${FIL} NAME_WE) if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) get_filename_component(FIL_DIR ${FIL} DIRECTORY) if(FIL_DIR) set(FIL_WE "${FIL_DIR}/${FIL_WE}") endif() endif() if(Protobuf_PRE_IMPORT_DIRS) set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${Protobuf_PRE_IMPORT_DIRS}/${FIL_WE}.pb.cc") set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${Protobuf_PRE_IMPORT_DIRS}/${FIL_WE}.pb.h") else() set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") endif() list(APPEND ${SRCS} "${_protobuf_protoc_src}") list(APPEND ${HDRS} "${_protobuf_protoc_hdr}") if(protobuf_DESCRIPTORS) set(_protobuf_protoc_desc "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.desc") set(_protobuf_protoc_flags "--descriptor_set_out=${_protobuf_protoc_desc}") list(APPEND ${protobuf_DESCRIPTORS} "${_protobuf_protoc_desc}") else() set(_protobuf_protoc_desc "") set(_protobuf_protoc_flags "") endif() add_custom_command( OUTPUT "${_protobuf_protoc_src}" "${_protobuf_protoc_hdr}" ${_protobuf_protoc_desc} COMMAND protobuf::protoc ${EXTRA_ARGS} "--cpp_out=${DLL_EXPORT_DECL}${CMAKE_CURRENT_BINARY_DIR}" ${_protobuf_protoc_flags} ${_protobuf_include_path} ${ABS_FIL} DEPENDS ${ABS_FIL} protobuf::protoc COMMENT "Running C++ protocol buffer compiler on ${FIL}" VERBATIM ) endforeach() set(${SRCS} "${${SRCS}}" PARENT_SCOPE) set(${HDRS} "${${HDRS}}" PARENT_SCOPE) if(protobuf_DESCRIPTORS) set(${protobuf_DESCRIPTORS} "${${protobuf_DESCRIPTORS}}" PARENT_SCOPE) endif() endfunction() ================================================ FILE: cmake/dependencies.cmake ================================================ include (ExternalProject) set (DEPENDENCIES) set (EXTRA_CMAKE_ARGS) # trtlab external dependencies list (APPEND DEPENDENCIES boost dlpack gflags glog benchmark googletest cpuaff jemalloc) list (APPEND DEPENDENCIES grpc-repo protobuf c-ares grpc cub cnpy) # note on ubuntu 18.04, you need # apt install libz-dev libssl-dev # customize the folder for external projects # download, source and builds for dependencies # will be in /Dependencies set_property (DIRECTORY PROPERTY EP_BASE Dependencies) # all dependencies will be installed here # typical directories: bin, include and lib set (BUILD_ROOT ${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Build) set (SOURCE_ROOT ${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Source) set (INSTALL_ROOT ${CMAKE_CURRENT_BINARY_DIR}/local) # set cmake search paths to pick up installed .cmake files list(INSERT CMAKE_MODULE_PATH 0 "${INSTALL_ROOT}/lib/cmake") list(INSERT CMAKE_PREFIX_PATH 0 "${INSTALL_ROOT}/lib/cmake") # cmake config args forwarded to trtlab list(APPEND EXTRA_CMAKE_ARGS -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} -DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH} # -DBoost_VERBOSE=ON -DBoost_USE_STATIC_LIBS=ON -DCPUAFF_ROOT=${INSTALL_ROOT} -DJEMALLOC_STATIC_LIBRARIES=${INSTALL_ROOT}/lib/libjemalloc_pic.a -DCUB_INCLUDE_DIR=${SOURCE_ROOT}/cub -DINSTALL_ROOT=${INSTALL_ROOT} ) # short-cut to dependencies build path set (BUILD_ROOT ${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Build) # Boost # ===== # - Use static linking to avoid issues with system-wide installations of Boost. # - Use numa=on to ensure the numa component of fiber gets built set(BOOST_COMPONENTS "context,fiber,filesystem") ExternalProject_Add (boost URL https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.gz URL_HASH SHA256=c66e88d5786f2ca4dbebb14e06b566fb642a1a6947ad8cc9091f9f445134143f CONFIGURE_COMMAND ./bootstrap.sh --prefix=${INSTALL_ROOT} --with-libraries=${BOOST_COMPONENTS} numa=on BUILD_COMMAND ./b2 link=static cxxflags=-fPIC cflags=-fPIC cxxflags="-std=c++14" numa=on --build-dir=${BUILD_ROOT}/boost --stagedir=${BUILD_ROOT}/boost BUILD_IN_SOURCE 1 INSTALL_COMMAND ./b2 install numa=on ) # DLPack # ====== ExternalProject_Add(dlpack GIT_REPOSITORY "https://github.com/dmlc/dlpack.git" GIT_TAG "master" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT} ) # gflags # ====== # config, build and install to INSTALL_ROOT ExternalProject_Add(gflags GIT_REPOSITORY "https://github.com/gflags/gflags.git" GIT_TAG "v2.2.2" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT} -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DBUILD_PACKAGING=OFF -DBUILD_TESTING=OFF -DBUILD_CONFIG_TESTS=OFF -DINSTALL_HEADERS=ON -DBUILD_gflags_LIB=OFF -DBUILD_gflags_nothreads_LIB=ON -DGFLAGS_NAMESPACE=google ) # glog # ==== # - link against shared # - todo: compile with -DWITH_GFLAGS=OFF and remove gflags dependency ExternalProject_Add(glog DEPENDS gflags GIT_REPOSITORY "https://github.com/google/glog" GIT_TAG "v0.4.0" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT} -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF ) # google benchmark # ================ ExternalProject_Add(benchmark DEPENDS GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG "v1.5.0" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Build/benchmark" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT} -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_TESTING=OFF ) # google test # =========== ExternalProject_Add(googletest DEPENDS glog gflags GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG "release-1.10.0" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/Dependencies/Build/googletest" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT} -DCMAKE_BUILD_TYPE=Release ) # cpuaff # ====== ExternalProject_Add(cpuaff URL http://dcdillon.github.io/cpuaff/releases/cpuaff-1.0.6.tar.gz CONFIGURE_COMMAND ./configure --prefix=${INSTALL_ROOT} BUILD_COMMAND make include INSTALL_COMMAND make install include BUILD_IN_SOURCE 1 ) # nvidia cub # ========== ExternalProject_Add(cub GIT_REPOSITORY https://github.com/NVlabs/cub.git GIT_TAG "1.8.0" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" ) # jemalloc # ======== ExternalProject_Add(jemalloc URL https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2 CONFIGURE_COMMAND ./configure --prefix=${INSTALL_ROOT} BUILD_COMMAND make include INSTALL_COMMAND make install include BUILD_IN_SOURCE 1 ) # cnpy - c++ library for reading and writing .npy/.npz files # ========================================================== ExternalProject_Add(cnpy GIT_REPOSITORY "https://github.com/rogersce/cnpy.git" GIT_TAG "master" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT} -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODEL=ON ) # grpc-repo # ========= ExternalProject_Add(grpc-repo GIT_REPOSITORY "https://github.com/grpc/grpc.git" GIT_TAG "v1.32.0" GIT_SUBMODULES "third_party/cares/cares" "third_party/protobuf" "third_party/abseil-cpp" "third_party/re2" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" ) # # Build protobuf project from grpc-repo # ExternalProject_Add(absl SOURCE_DIR "${SOURCE_ROOT}/grpc-repo/third_party/abseil-cpp" DOWNLOAD_COMMAND "" CMAKE_CACHE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=TRUE -DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT} DEPENDS grpc-repo ) ExternalProject_Add(re2 SOURCE_DIR "${SOURCE_ROOT}/grpc-repo/third_party/re2" DOWNLOAD_COMMAND "" CMAKE_CACHE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=TRUE -DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT} DEPENDS grpc-repo ) ExternalProject_Add(protobuf SOURCE_DIR "${SOURCE_ROOT}/grpc-repo/third_party/protobuf/cmake" DOWNLOAD_COMMAND "" CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -Dprotobuf_BUILD_TESTS:BOOL=OFF -Dprotobuf_WITH_ZLIB:BOOL=OFF -Dprotobuf_MSVC_STATIC_RUNTIME:BOOL=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT} DEPENDS grpc-repo ) # Location where protobuf-config.cmake will be installed varies by # platform if (WIN32) set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}/protobuf/cmake") else() set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${INSTALL_ROOT}/lib/cmake") endif() # # Build c-area project from grpc-repo # ExternalProject_Add(c-ares SOURCE_DIR "${SOURCE_ROOT}/grpc-repo/third_party/cares/cares" DOWNLOAD_COMMAND "" CMAKE_ARGS -DCARES_SHARED:BOOL=OFF -DCARES_STATIC:BOOL=ON -DCARES_STATIC_PIC:BOOL=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT} DEPENDS grpc-repo ) # # Build GRPC # ExternalProject_Add(grpc SOURCE_DIR "${SOURCE_ROOT}/grpc-repo" DOWNLOAD_COMMAND "" CMAKE_ARGS -DgRPC_INSTALL:BOOL=ON -DgRPC_BUILD_TESTS:BOOL=OFF -DgRPC_PROTOBUF_PROVIDER:STRING=package -DgRPC_PROTOBUF_PACKAGE_TYPE:STRING=CONFIG -DProtobuf_DIR:PATH=${INSTALL_ROOT}/lib/cmake -DgRPC_ZLIB_PROVIDER:STRING=package -DgRPC_CARES_PROVIDER:STRING=package -Dc-ares_DIR:PATH=${INSTALL_ROOT}/lib/cmake -DgRPC_SSL_PROVIDER:STRING=package -DgRPC_GFLAGS_PROVIDER=package -DgRPC_BENCHMARK_PROVIDER=package -DgRPC_RE2_PROVIDER:STRING=package -Dre2_DIR:STRING=${INSTALL_ROOT}/lib/cmake -DgRPC_ABSL_PROVIDER:STRING=package -Dabsl_DIR:STRING=${INSTALL_ROOT}/lib/cmake ${_CMAKE_ARGS_OPENSSL_ROOT_DIR} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH=${INSTALL_ROOT} DEPENDS grpc-repo c-ares protobuf re2 absl gflags benchmark ) # trtlab # ====== ExternalProject_Add (trtlab DEPENDS ${DEPENDENCIES} SOURCE_DIR ${PROJECT_SOURCE_DIR} CMAKE_ARGS -DBUILD_DEPENDENCIES=OFF ${EXTRA_CMAKE_ARGS} INSTALL_COMMAND "" BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) ================================================ FILE: devel.sh ================================================ #!/bin/bash # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # models_path=${TRT_MODELS_PATH:-"/path/to/my/models"} models_cli="" if [ -d "$models_path" ]; then models_cli=" -v $(realpath $models_path):/work/models " fi crt="" if [ -x "$(which luda)" ] ; then echo "Using luda" crt="$(which luda) --no-home" elif [ -x "$(which nvidia-docker)" ]; then echo "Using nvidia-docker" crt="nvidia-docker run --rm -ti" else echo "No GPU container runtime found" exit 911 fi NV_GPU=0 $crt -v $PWD:/work $models_cli --workdir /work --name trtlab --net host trtlab ================================================ FILE: examples/00_TensorRT/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. include_directories(${CUDA_INCLUDE_DIRS}) include_directories(${TensorRT_INCLUDE_DIRS}) add_executable(inference.x inference.cc ${PROTO_SRCS} ${PROTO_GRPC_SRCS}) target_link_libraries(inference.x trtlab::nvrpc trtlab::tensorrt gflags ) add_executable(infer.x infer.cc ${PROTO_SRCS} ${PROTO_GRPC_SRCS}) target_link_libraries(infer.x trtlab::nvrpc trtlab::tensorrt gflags ) if(YAIS_ENABLE_MPI) find_package(MPI) include_directories(SYSTEM ${MPI_INCLUDE_PATH}) target_link_libraries(inference.x ${MPI_C_LIBRARIES} ${MPI_CXX_LIBRARIES} ) target_compile_definitions(inference.x PUBLIC PLAYGROUND_USE_MPI) endif() ================================================ FILE: examples/00_TensorRT/README.md ================================================ # Inference Example Basic CLI tool for executing TensorRT engines. Provide an engine and `inference.x` will run a simplifed inference pipeline using synthetic data. The program will run a pipelined H2D -> TensorRT -> D2H calculation for `--seconds` (default: 5) with a 0.1 second warmup run. By default, only 1 TensorRT Execution Context is used to perform the evaulation. You can modify the number of contexts using the `--contexts`. Unless provided, the number of Input/Output Buffers is set to `(2 * contexts)`. See below for the list of [options](#options). The `inference.x` program is fully pipelined and asynchronous. It performs uses three threads (default) to: 1) async copy input H2D, 2) launch the async inference evaluation and return output tensor to the host, and 3) to wait on the resouces used during execution and release them when finished. This final thread is where one might build a return message or do something else with the results. While running `inference.x`, you may find it useful to monitor GPU metrics using: ``` nvidia-smi dmon -i 0 -s put ``` Note: If you see numbers that differ from the output of `giexec`, you may have an IO bottleneck in that the transfers are more expensive than the compute. * TODO: Update the program to output avg xfer time. * TODO: Build .engine files as part of the build ## Quickstart ``` root@dgx:/work/build/examples/00_TensorRT# ./inference.x --engine=/work/models/ResNet-50-b1-int8.engine I0702 22:16:51.868419 10857 TensorRT.cc:561] -- Initialzing TensorRT Resource Manager -- I0702 22:16:51.868676 10857 TensorRT.cc:562] Maximum Execution Concurrency: 1 I0702 22:16:51.868686 10857 TensorRT.cc:563] Maximum Copy Concurrency: 2 I0702 22:16:53.430330 10857 TensorRT.cc:628] -- Registering Model: 0 -- I0702 22:16:53.430399 10857 TensorRT.cc:629] Input/Output Tensors require 591.9 KiB I0702 22:16:53.430415 10857 TensorRT.cc:630] Execution Activations require 2.5 MiB I0702 22:16:53.430428 10857 TensorRT.cc:633] Weights require 30.7 MiB I0702 22:16:53.437571 10857 TensorRT.cc:652] -- Allocating TensorRT Resources -- I0702 22:16:53.437587 10857 TensorRT.cc:653] Creating 1 TensorRT execution tokens. I0702 22:16:53.437595 10857 TensorRT.cc:654] Creating a Pool of 2 Host/Device Memory Stacks I0702 22:16:53.437607 10857 TensorRT.cc:655] Each Host Stack contains 608.0 KiB I0702 22:16:53.437614 10857 TensorRT.cc:656] Each Device Stack contains 3.2 MiB I0702 22:16:53.437623 10857 TensorRT.cc:657] Total GPU Memory: 6.5 MiB I0702 22:16:53.540400 10857 inference.cc:93] -- Inference: Running for ~5 seconds with batch_size 1 -- I0702 22:16:58.543475 10857 inference.cc:131] Inference Results: 4770 batches in 5.00307 seconds; sec/batch: 0.00104886; inf/sec: 953.414 ``` ## Options ``` -buffers (Number of Buffers (default: 2x contexts)) type: int32 default: 0 -contexts (Number of Execution Contexts) type: int32 default: 1 -cudathreads (Number Cuda Launcher Threads) type: int32 default: 1 -engine (TensorRT serialized engine) type: string default: "/work/models/trt4.engine" -respthreads (Number Response Sync Threads) type: int32 default: 1 -seconds (Number of Execution Contexts) type: int32 default: 5 ``` ================================================ FILE: examples/00_TensorRT/infer.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "tensorrt/laboratory/core/thread_pool.h" #include "tensorrt/laboratory/infer_bench.h" #include "tensorrt/laboratory/inference_manager.h" #include "tensorrt/laboratory/model.h" #include "tensorrt/laboratory/runtime.h" #ifdef PLAYGROUND_USE_MPI #include "mpi.h" #define MPI_CHECK(mpicall) mpicall #else #define MPI_CHECK(mpicall) #endif using trtlab::ThreadPool; using trtlab::TensorRT::InferBench; using trtlab::TensorRT::InferBenchKey; using trtlab::TensorRT::InferenceManager; using trtlab::TensorRT::ManagedRuntime; using trtlab::TensorRT::Model; using trtlab::TensorRT::Runtime; using trtlab::TensorRT::StandardRuntime; static std::string ModelName(int model_id) { std::ostringstream stream; stream << model_id; return stream.str(); } static bool ValidateEngine(const char* flagname, const std::string& value) { struct stat buffer; return (stat(value.c_str(), &buffer) == 0); } DEFINE_string(engine, "/path/to/tensorrt.engine", "TensorRT serialized engine"); DEFINE_validator(engine, &ValidateEngine); DEFINE_string(runtime, "default", "TensorRT Runtime"); DEFINE_int32(seconds, 5, "Approximate number of seconds for the timing loop"); DEFINE_int32(contexts, 1, "Number of Execution Contexts"); DEFINE_int32(buffers, 0, "Number of Buffers (default: 2x contexts)"); DEFINE_int32(cudathreads, 1, "Number Cuda Launcher Threads"); DEFINE_int32(respthreads, 1, "Number Response Sync Threads"); DEFINE_int32(replicas, 1, "Number of Replicas of the Model to load"); DEFINE_int32(batch_size, 0, "Overrides the max batch_size of the provided engine"); int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("TensorRT Inference"); ::google::ParseCommandLineFlags(&argc, &argv, true); MPI_CHECK(MPI_Init(&argc, &argv)); auto contexts = FLAGS_contexts; auto buffers = FLAGS_buffers ? FLAGS_buffers : 2 * FLAGS_contexts; auto resources = std::make_shared(contexts, buffers); resources->RegisterThreadPool("pre", std::make_unique(1)); resources->RegisterThreadPool("cuda", std::make_unique(1)); resources->RegisterThreadPool("post", std::make_unique(3)); //, FLAGS_cudathreads, FLAGS_respthreads); std::shared_ptr runtime; if(FLAGS_runtime == "default") { runtime = std::make_shared(); } else if(FLAGS_runtime == "unified") { runtime = std::make_shared(); } else { LOG(FATAL) << "Invalid TensorRT Runtime"; } std::vector> models; models.push_back(runtime->DeserializeEngine(FLAGS_engine)); resources->RegisterModel("0", models.back()); resources->AllocateResources(); auto batch_size = FLAGS_batch_size ? FLAGS_batch_size : models.back()->GetMaxBatchSize(); for(int i = 1; i < FLAGS_replicas; i++) { models.push_back(runtime->DeserializeEngine(FLAGS_engine)); resources->RegisterModel(ModelName(i), models.back()); } { InferBench benchmark(resources); benchmark.Run(models, batch_size, 0.1); // if testing mps - sync all processes before executing timed loop MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); auto results = benchmark.Run(models, batch_size, FLAGS_seconds); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); // todo: perform an mpi_allreduce to collect the per process timings // for a simplified report MPI_CHECK(MPI_Finalize()); using namespace trtlab::TensorRT; LOG(INFO) << "Inference Results: " << (*results)[kBatchesComputed] << " batches computed in " << (*results)[kWalltime] << " seconds on " << (*results)[kMaxExecConcurrency] << " compute streams using batch_size: " << (*results)[kBatchSize] << "; inf/sec: " << (*results)[kInferencesPerSecond] << "; batches/sec: " << (*results)[kBatchesPerSecond] << "; execution time per batch: " << (*results)[kExecutionTimePerBatch]; } return 0; } ================================================ FILE: examples/00_TensorRT/inference.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "tensorrt/laboratory/core/thread_pool.h" #include "tensorrt/laboratory/inference_manager.h" #include "tensorrt/laboratory/runtime.h" #ifdef PLAYGROUND_USE_MPI #include "mpi.h" #define MPI_CHECK(mpicall) mpicall #else #define MPI_CHECK(mpicall) #endif using trtlab::ThreadPool; using trtlab::TensorRT::CustomRuntime; using trtlab::TensorRT::InferenceManager; using trtlab::TensorRT::ManagedAllocator; using trtlab::TensorRT::Runtime; using trtlab::TensorRT::StandardAllocator; static int g_Concurrency = 0; static std::string ModelName(int model_id) { std::ostringstream stream; stream << model_id; return stream.str(); } class InferenceResources : public InferenceManager { public: InferenceResources(int max_executions, int max_buffers, size_t nCuda, size_t nResp) : InferenceManager(max_executions, max_buffers), m_CudaThreadPool(std::make_unique(nCuda)), m_ResponseThreadPool(std::make_unique(nResp)) { } ~InferenceResources() override {} std::unique_ptr& GetCudaThreadPool() { return m_CudaThreadPool; } std::unique_ptr& GetResponseThreadPool() { return m_ResponseThreadPool; } private: std::unique_ptr m_CudaThreadPool; std::unique_ptr m_ResponseThreadPool; }; class Inference final { public: Inference(std::shared_ptr resources) : m_Resources(resources) {} void Run(float seconds, bool warmup, int replicas, uint32_t requested_batch_size) { int replica = 0; uint64_t inf_count = 0; auto start = std::chrono::steady_clock::now(); auto elapsed = [start]() -> float { return std::chrono::duration(std::chrono::steady_clock::now() - start).count(); }; auto model = GetResources()->GetModel(ModelName(replica++)); auto batch_size = requested_batch_size ? requested_batch_size : model->GetMaxBatchSize(); if(batch_size > model->GetMaxBatchSize()) { LOG(FATAL) << "Requested batch_size greater than allowed by the compiled TensorRT Engine"; } // Inference Loop - Main thread copies, cuda thread launches, response thread completes if(!warmup) { LOG(INFO) << "-- Inference: Running for ~" << (int)seconds << " seconds with batch_size " << batch_size << " --"; } std::vector> futures; while(elapsed() < seconds && ++inf_count) { if(replica >= replicas) replica = 0; // This thread only async copies buffers H2D auto model = GetResources()->GetModel(ModelName(replica++)); auto buffers = GetResources()->GetBuffers(); // <=== Limited Resource; May Block !!! auto bindings = buffers->CreateBindings(model); auto promise = std::make_shared>(); futures.push_back(promise->get_future()); bindings->SetBatchSize(batch_size); bindings->CopyToDevice(bindings->InputBindings()); GetResources()->GetCudaThreadPool()->enqueue([this, bindings, promise]() mutable { // This thread enqueues two async kernels: // 1) TensorRT execution // 2) D2H of output tensors auto trt = GetResources()->GetExecutionContext( bindings->GetModel()); // <=== Limited Resource; May Block !!! trt->Infer(bindings); bindings->CopyFromDevice(bindings->OutputBindings()); GetResources()->GetResponseThreadPool()->enqueue( [bindings, trt, promise]() mutable { // This thread waits on the completion of the async compute and the async // copy trt->Synchronize(); trt.reset(); // Finished with the Execution Context - Release it to // competing threads bindings->Synchronize(); bindings.reset(); // Finished with Buffers - Release it to competing threads promise->set_value(); }); }); } for(const auto& f : futures) { f.wait(); } /* // Join worker threads if (!warmup) GetResources()->GetCudaThreadPool().reset(); if (!warmup) GetResources()->GetResponseThreadPool().reset(); */ // End timing and report auto total_time = std::chrono::duration(elapsed()).count(); auto inferences = inf_count * batch_size; if(!warmup) LOG(INFO) << "Inference Results: " << inf_count << "; batches in " << total_time << " seconds" << "; sec/batch/stream: " << total_time / (inf_count / g_Concurrency) << "; batches/sec: " << inf_count / total_time << "; inf/sec: " << inferences / total_time; } protected: inline std::shared_ptr GetResources() { return m_Resources; } private: std::shared_ptr m_Resources; }; static bool ValidateEngine(const char* flagname, const std::string& value) { struct stat buffer; return (stat(value.c_str(), &buffer) == 0); } DEFINE_string(engine, "/path/to/tensorrt.engine", "TensorRT serialized engine"); DEFINE_validator(engine, &ValidateEngine); DEFINE_string(runtime, "default", "TensorRT Runtime"); DEFINE_int32(seconds, 5, "Approximate number of seconds for the timing loop"); DEFINE_int32(contexts, 1, "Number of Execution Contexts"); DEFINE_int32(buffers, 0, "Number of Buffers (default: 2x contexts)"); DEFINE_int32(cudathreads, 1, "Number Cuda Launcher Threads"); DEFINE_int32(respthreads, 1, "Number Response Sync Threads"); DEFINE_int32(replicas, 1, "Number of Replicas of the Model to load"); DEFINE_int32(batch_size, 0, "Overrides the max batch_size of the provided engine"); int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("TensorRT Inference"); ::google::ParseCommandLineFlags(&argc, &argv, true); MPI_CHECK(MPI_Init(&argc, &argv)); auto contexts = g_Concurrency = FLAGS_contexts; auto buffers = FLAGS_buffers ? FLAGS_buffers : 2 * FLAGS_contexts; auto resources = std::make_shared(contexts, buffers, FLAGS_cudathreads, FLAGS_respthreads); std::shared_ptr runtime; if(FLAGS_runtime == "default") { runtime = std::make_shared>(); } else if(FLAGS_runtime == "unified") { runtime = std::make_shared>(); } else { LOG(FATAL) << "Invalid TensorRT Runtime"; } resources->RegisterModel("0", runtime->DeserializeEngine(FLAGS_engine)); resources->AllocateResources(); for(int i = 1; i < FLAGS_replicas; i++) { resources->RegisterModel(ModelName(i), runtime->DeserializeEngine(FLAGS_engine)); } Inference inference(resources); inference.Run(0.1, true, 1, 0); // warmup // if testing mps - sync all processes before executing timed loop MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); inference.Run(FLAGS_seconds, false, FLAGS_replicas, FLAGS_batch_size); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); // todo: perform an mpi_allreduce to collect the per process timings // for a simplified report MPI_CHECK(MPI_Finalize()); return 0; } ================================================ FILE: examples/01_Basic_GRPC/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add_executable(echo-grpc.x src/server.cpp) target_link_libraries(echo-grpc.x nvrpc echo-protos gflags ) add_executable(echo-client.x src/client.cpp) target_link_libraries(echo-client.x nvrpc echo-protos gflags ) add_executable(async-echo-client.x src/async_client.cc) target_link_libraries(async-echo-client.x nvrpc nvrpc-client echo-protos gflags ) ================================================ FILE: examples/01_Basic_GRPC/README.md ================================================ Simple service to test and stress the core service and request logic. The [`server.cc`](examples/01_Basic_GRPC/server.cc) is very well documented and should be used a reference for the gRPC interface provided by the library. ================================================ FILE: examples/01_Basic_GRPC/src/async_client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "nvrpc/client/client_unary.h" #include "nvrpc/client/executor.h" #include "echo.grpc.pb.h" using grpc::Channel; using grpc::ClientContext; using grpc::Status; using simple::Inference; using simple::Input; using simple::Output; using nvrpc::client::ClientUnary; using nvrpc::client::Executor; DEFINE_int32(count, 100, "number of grpc messages to send"); DEFINE_int32(thread_count, 1, "Size of thread pool"); int main(int argc, char** argv) { // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); auto executor = std::make_shared(FLAGS_thread_count); auto channel = grpc::CreateChannel("localhost:50051", grpc::InsecureChannelCredentials()); auto stub = Inference::NewStub(channel); auto infer_prepare_fn = [&stub](::grpc::ClientContext * context, const ::simple::Input& request, ::grpc::CompletionQueue* cq) -> auto { return std::move(stub->PrepareAsyncCompute(context, request, cq)); }; auto runner = std::make_unique>(infer_prepare_fn, executor); auto start = std::chrono::steady_clock::now(); auto elapsed = [start]() -> float { return std::chrono::duration(std::chrono::steady_clock::now() - start).count(); }; for(int i = 0; i < FLAGS_count; i++) { Input input; input.set_batch_id(i); runner->Enqueue(std::move(input), [i](Input& input, Output& output, ::grpc::Status& status) -> bool { CHECK(output.batch_id() == i); LOG_FIRST_N(INFO, 20) << "Check: " << i; return (bool)(output.batch_id() == i); }); } std::cout << FLAGS_count << " queued in " << elapsed() << "seconds" << std::endl; executor->ShutdownAndJoin(); std::cout << FLAGS_count << " completed in " << elapsed() << "seconds" << std::endl; return 0; } ================================================ FILE: examples/01_Basic_GRPC/src/client.cpp ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "echo.grpc.pb.h" using grpc::Channel; using grpc::ClientContext; using grpc::Status; using simple::Input; using simple::Output; using simple::Inference; class SimpleClient { public: SimpleClient(std::shared_ptr channel) : stub_(Inference::NewStub(channel)) {} // Assembles the client's payload, sends it and presents the response back // from the server. int Compute(const int batch_id) { // Data we are sending to the server. Input request; request.set_batch_id(batch_id); // Container for the data we expect from the server. Output reply; // Context for the client. It could be used to convey extra information to // the server and/or tweak certain RPC behaviors. ClientContext context; // The actual RPC. Status status = stub_->Compute(&context, request, &reply); // Act upon its status. if (status.ok()) { return reply.batch_id(); } else { std::cout << status.error_code() << ": " << status.error_message() << std::endl; return -1; } } private: std::unique_ptr stub_; }; DEFINE_int32(count, 100, "number of grpc messages to send"); int main(int argc, char** argv) { // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); SimpleClient client(grpc::CreateChannel( "localhost:50051", grpc::InsecureChannelCredentials())); auto start = std::chrono::steady_clock::now(); for(int i=0; i(end - start).count(); std::cout << FLAGS_count << " requests in " << elapsed << "seconds" << std::endl; return 0; } ================================================ FILE: examples/01_Basic_GRPC/src/server.cpp ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "nvrpc/server.h" #include "nvrpc/service.h" #include "nvrpc/executor.h" #include "tensorrt/laboratory/core/pool.h" #include "tensorrt/laboratory/core/resources.h" #include "tensorrt/laboratory/core/thread_pool.h" #include "echo.pb.h" #include "echo.grpc.pb.h" using nvrpc::AsyncService; using nvrpc::AsyncRPC; using nvrpc::Context; using nvrpc::Executor; using nvrpc::Server; using trtlab::Resources; using trtlab::ThreadPool; // CLI Options DEFINE_int32(thread_count, 1, "Size of thread pool"); /** * Embedding a copy of the Protobuf specification for the gRPC service. * * Package Name: simple * Service Name: Inference * RPC Name: Compute * * Incoming Message: Input * Outgoing Message: Ouput ** syntax = "proto3"; package simple; service Inference { rpc Compute (Input) returns (Output) {} } message Input { uint64 batch_id = 1; } message Output { uint64 batch_id = 1; } */ // Define the resources your RPC will need to execute // ================================================== // In this case, all simple::Inference::Compute RPCs share a threadpool in which they will // queue up some work on. This essentially means, after the message as been received and // processed, the actual work for the RPC is pushed to a worker pool outside the scope of // the transaction processing system (TPS). This is essentially async computing, we have // decoupled the transaction from the workers executing the implementation. The TPS can // continue to queue work, while the workers process the load. struct SimpleResources : public Resources { SimpleResources(int numThreadsInPool=3) : m_ThreadPool(numThreadsInPool) { LOG(INFO) << "Server ThreadCount: " << numThreadsInPool; } ThreadPool& AcquireThreadPool() { return m_ThreadPool; } private: ThreadPool m_ThreadPool; }; // Contexts hold the state and provide the definition of the work to be performed by the RPC. // This is where you define what gets executed for a given RPC. // Incoming Message = simple::Input (RequestType) // Outgoing Message = simple::Output (ResponseType) class SimpleContext final : public Context { void ExecuteRPC(RequestType &input, ResponseType &output) final override { // We could do work here, but we'd block the TPS, i.e. the threads pulling messages // off the incoming recieve queue. Very quick responses are best done here; however, // longer running workload should be offloaded so the TPS can avoid being blocked. GetResources()->AcquireThreadPool().enqueue([this, &input, &output]{ // Now running on a worker thread of the ThreadPool defined in SimpleResources. // Here we are just echoing back the incoming // batch_id; however, in later // examples, we'll show how to run an async cuda pipline. LOG_FIRST_N(INFO, 20) << "Tag = " << Tag() << " Thread = " << std::this_thread::get_id(); output.set_batch_id(input.batch_id()); this->FinishResponse(); }); // The TPS thread is now free to continue processing message - async ftw! } }; int main(int argc, char *argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("simpleServer"); ::google::ParseCommandLineFlags(&argc, &argv, true); // A server will bind an IP:PORT to listen on Server server("0.0.0.0:50051"); // A server can host multiple services LOG(INFO) << "Register Service (simple::Inference) with Server"; auto simpleInference = server.RegisterAsyncService(); // An RPC has two components that need to be specified when registering with the service: // 1) Type of Execution Context (SimpleContext). The execution context defines the behavor // of the RPC, i.e. it contains the control logic for the execution of the RPC. // 2) The Request function (RequestCompute) which was generated by gRPC when compiling the // protobuf which defined the service. This function is responsible for queuing the // RPC's execution context to the LOG(INFO) << "Register RPC (simple::Inference::Compute) with Service (simple::Inference)"; auto rpcCompute = simpleInference->RegisterRPC( &simple::Inference::AsyncService::RequestCompute ); LOG(INFO) << "Initializing Resources for RPC (simple::Inference::Compute)"; auto rpcResources = std::make_shared(FLAGS_thread_count); // Create Executors - Executors provide the messaging processing resources for the RPCs // Multiple Executors can be registered with a Server. The executor is responsible // for pulling incoming message off the receive queue and executing the associated // context. By default, an executor only uses a single thread. A typical usecase is // an Executor executes a context, which immediate pushes the work to a thread pool. // However, for very low-latency messaging, you might want to use a multi-threaded // Executor and a Blocking Context - meaning the Context performs the entire RPC function // on the Executor's thread. LOG(INFO) << "Creating Executor"; auto executor = server.RegisterExecutor(new Executor(1)); // You can register RPC execution contexts from any registered RPC on any executor. // The power of that will become clear in later examples. For now, we will register // 10 instances of the simple::Inference::Compute RPC's SimpleContext execution context // with the Executor. LOG(INFO) << "Creating Execution Contexts for RPC (simple::Inference::Compute) with Executor"; executor->RegisterContexts(rpcCompute, rpcResources, 10); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(2000), []{ // This is a timeout loop executed every 2seconds // Run() with no arguments will run an empty timeout loop every 5 seconds. // RunAsync() will return immediately, its your responsibility to ensure the // server doesn't go out of scope or a Shutdown will be triggered on your services. }); } ================================================ FILE: examples/02_TensorRT_GRPC/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. find_package(prometheus-cpp CONFIG REQUIRED) if(prometheus-cpp_FOUND) message(STATUS "Prometheus Metics Enabled") endif(prometheus-cpp_FOUND) add_executable(inference-grpc.x src/metrics.cc src/server.cc ) target_include_directories(inference-grpc.x PUBLIC $ ) target_link_libraries(inference-grpc.x trtlab::nvrpc trtlab::tensorrt prometheus-cpp::prometheus-cpp demo-protos gflags ) add_executable(client-sync.x src/sync-client.cc ) target_link_libraries(client-sync.x nvrpc demo-protos gflags ) add_executable(client-async.x src/async-client.cc ) target_link_libraries(client-async.x nvrpc demo-protos gflags ) add_executable(siege.x src/siege.cc ) target_link_libraries(siege.x nvrpc demo-protos gflags ) ================================================ FILE: examples/02_TensorRT_GRPC/README.md ================================================ # TensorRT GRPC Example This examples extends the [TensorRT](examples/00_TensorRT) compute loop into an async gRPC service similar to [example 01_gRPC](examples/01_GRPC). There are three take-aways from this example: 1. TensorRT compute pipeline is implemented as the `ExecuteRPC` virtual function of the `Context`. 2. An external datasource is used to override the input bindings 3. Custom [Prometheus](https://prometheus.io) metrics for inference compute and request durations, load ratio, and GPU power gauge. are recorded/observed. ## Quickstart ``` cd /work/build/examples/02_TensorRT_GRPC ./inference-grpc.x --contexts=8 --engine=/work/models/ResNet-50-b1-int8.engine --port 50051 & ./siege.x --port=50051 --rate=2500 # ctrl+c to cancel client telegraf -test -config /work/examples/91_Prometheus/scrape.conf ``` ## Explore Fun things to try: * Evaluate the performance of the model using `inference.x` in [examples/00_TensorRT](examples/00_TensorRT) * Try running `siege.x` below, at, and above the benchmarked rate and watch the metrics via `telegraf`. * Deploy on Kubernetes, collect metrics via Prometheus and visualize using Grafana; [examples/90_Kubernetes](examples/90_Kubernetes). ## Server/Service `inference-grpc.x` CLI options: * `--engine` - the compiled TensorRT plan/engine * `--contexts` - the maximium number of concurrent evaluations of the engine. * `--port` - the port on which requests are received (default: 50051) * `--metrics` - the port on which to expose metrics to be scraped (default: 50078) ## Clients Three clients are available: * `client-sync.x` - sends a blocking inference request to the service and waits for the response. Only 1 request is ever in-flight at a given time. * `client-async.x` - the async client is capable of issuing multiple in-flight requests. Note: the load-balancer is limited to 1000 outstanding requests per client before circuit- breaking. Running more than 1000 requests will trigger 503 if targeting the envoy load- balancer. The client has no backoff and will try to send the full compliment of requested inference requests. `siege.x` is the better async client. * `siege.x` - constant rate (`--rate`) async engine that is hard-coded to have no more than 950 outstanding in-flight requests. A warning will be given client-side if the outstanding requests tops meaning the rate is limited by the server-side compute. TODO: * Add more varied test clients akin to [Netflix's Chaos Monkeys](https://github.com/Netflix/chaosmonkey), but for gRPC client behavior. * Random rate, random pulses, canceled messsages, messages wiht unreasonable timeouts, etc. ## Metrics YAIS metrics are gathered and exposed via the [prometheus-cpp](https://github.com/jupp0r/prometheus-cpp) client library. In this example, we expose four custom [metrics](https://prometheus.io/docs/concepts/metric_types/): 2 Summaries, 1 Histogram and 1 Gauge. * `compute_duration` and `request_duration` are summaries recored with the model name as a component of the metric. This is useful for evaluating how a given model is performing, but this is not a good metric to aggregate across multiple service. * `load_ratio` is a histogram of `request_duraton / compute_duration`. Ideally, this unitless value is just over 1.0. Values higher than 1.0 are indictive of some delays in the compute of a given request. Sources of delays include, overloaded queues and/or starvation of resources. Histograms can be aggregated across services, which makes this metric a good candidate for triggering auto-scaling. * `gpu_power` is a simple gauge that periodicly reports the instaneous power being consumed by the device. As the load increases on the service, the power should increase proprotionally, until the power is capped either by device limits or compute resources. When power capped, the `load_ratio` will begin to increase under futher increases in traffic. ### Acquiring Metrics Prometheus metrics are generally scraped by a Prometheus service. When using Kubernetes to deploy services, the [prometheus-operator](https://github.com/coreos/prometheus-operator) provides a [`ServiceMonitor`](https://github.com/coreos/prometheus-operator#customresourcedefinitions) which allows you to define custom scraping configuration per service. See the [Kubernetes example](examples/90_Kubernetes) for more details. While testing, you can use the [`telegraf`](https://github.com/influxdata/telegraf) application to scrape local services. ``` # start service telegraf -test -config /work/examples/91_Prometheus/scrape.conf ``` Here is some sample output (line breaks added for readability): ``` > yais_inference_compute_duration_ms,host=dgx,model=flowers,url=http://localhost:50078/metrics count=1000,sum=2554.070996 1530985302000000000 > yais_inference_compute_duration_ms_quantile,host=dgx,model=flowers,quantile=0.500000,url=http://localhost:50078/metrics value=2.526903 1530985302000000000 > yais_inference_compute_duration_ms_quantile,host=dgx,model=flowers,quantile=0.900000,url=http://localhost:50078/metrics value=2.625447 1530985302000000000 > yais_inference_compute_duration_ms_quantile,host=dgx,model=flowers,quantile=0.990000,url=http://localhost:50078/metrics value=2.855728 1530985302000000000 > yais_inference_request_duration_ms,host=dgx,model=flowers,url=http://localhost:50078/metrics count=1000,sum=243547.558097 1530985302000000000 > yais_inference_request_duration_ms_quantile,host=dgx,model=flowers,quantile=0.500000,url=http://localhost:50078/metrics value=253.216653 1530985302000000000 > yais_inference_request_duration_ms_quantile,host=dgx,model=flowers,quantile=0.900000,url=http://localhost:50078/metrics value=256.715759 1530985302000000000 > yais_inference_request_duration_ms_quantile,host=dgx,model=flowers,quantile=0.990000,url=http://localhost:50078/metrics value=275.407232 1530985302000000000 > yais_inference_load_ratio,host=dgx,url=http://localhost:50078/metrics +Inf=1000,1.25=1,1.5=1,10=9,100=253,2=1,count=1000,sum=95879.013208 1530985302000000000 > yais_gpus_power_usage,gpu=0,host=dgx,url=http://localhost:50078/metrics gauge=52.821 1530985302000000000 > yais_executor_queue_depth,host=dgx,url=http://localhost:50078/metrics gauge=0 1530985302000000000 ``` ### Best Practices For a good description of using histograms vs. summaries to collect meaningful metrics see: https://prometheus.io/docs/practices/histograms/ Two rules of thumb: - If you need to aggregate, choose histograms. - Otherwise, choose a histogram if you have an idea of the range and distribution of values that will be observed. Choose a summary if you need an accurate quantile, no matter what the range and distribution of the values is. ================================================ FILE: examples/02_TensorRT_GRPC/src/async-client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Original Copyright proivded below. * This work extends the original gRPC client examples to work with the * implemented server. * * Copyright 2015 gRPC authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ #include #include #include #include #include #include #include #include #include #include "inference.grpc.pb.h" using grpc::Channel; using grpc::ClientAsyncResponseReader; using grpc::ClientContext; using grpc::CompletionQueue; using grpc::Status; using ssd::BatchInput; using ssd::BatchPredictions; using ssd::Inference; class GreeterClient { public: explicit GreeterClient(std::shared_ptr channel) : stub_(Inference::NewStub(channel)) {} // Assembles the client's payload and sends it to the server. void SayHello(const size_t batch_id, const int batch_size) { // Data we are sending to the server. BatchInput request; request.set_batch_id(batch_id); request.set_batch_size(batch_size); // Call object to store rpc data AsyncClientCall* call = new AsyncClientCall; // stub_->PrepareAsyncSayHello() creates an RPC object, returning // an instance to store in "call" but does not actually start the RPC // Because we are using the asynchronous API, we need to hold on to // the "call" instance in order to get updates on the ongoing RPC. call->response_reader = stub_->PrepareAsyncCompute(&call->context, request, &cq_); // StartCall initiates the RPC call call->response_reader->StartCall(); // Request that, upon completion of the RPC, "reply" be updated with the // server's response; "status" with the indication of whether the operation // was successful. Tag the request with the memory address of the call object. call->response_reader->Finish(&call->reply, &call->status, (void*)call); } // Loop while listening for completed responses. // Prints out the response from the server. void AsyncCompleteRpc() { void* got_tag; bool ok = false; // Block until the next result is available in the completion queue "cq". while(cq_.Next(&got_tag, &ok)) { // The tag in this example is the memory location of the call object AsyncClientCall* call = static_cast(got_tag); // Verify that the request was completed successfully. Note that "ok" // corresponds solely to the request for updates introduced by Finish(). GPR_ASSERT(ok); if(call->status.ok()) { // std::cout << "Greeter received: " << call->reply.batch_id() << std::endl; } else { std::cout << "RPC failed" << std::endl; } // Once we're complete, deallocate the call object. delete call; } } void Shutdown() { cq_.Shutdown(); } private: // struct for keeping state and data information struct AsyncClientCall { // Container for the data we expect from the server. BatchPredictions reply; // Context for the client. It could be used to convey extra information to // the server and/or tweak certain RPC behaviors. ClientContext context; // Storage for the status of the RPC upon completion. Status status; std::unique_ptr> response_reader; }; // Out of the passed in Channel comes the stub, stored here, our view of the // server's exposed services. std::unique_ptr stub_; // The producer-consumer queue we use to communicate asynchronously with the // gRPC runtime. CompletionQueue cq_; }; DEFINE_int32(count, 500, "number of grpc messages to send"); DEFINE_int32(batch_size, 1, "batch_size"); DEFINE_int32(port, 50051, "server_port"); int main(int argc, char** argv) { FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). std::ostringstream ip_port; ip_port << "localhost:" << FLAGS_port; GreeterClient greeter(grpc::CreateChannel(ip_port.str(), grpc::InsecureChannelCredentials())); // Spawn reader thread that loops indefinitely std::thread thread_ = std::thread(&GreeterClient::AsyncCompleteRpc, &greeter); auto start = std::chrono::steady_clock::now(); for(size_t i = 0; i < FLAGS_count; i++) { greeter.SayHello(i, FLAGS_batch_size); // The actual RPC call! } greeter.Shutdown(); thread_.join(); // blocks forever auto end = std::chrono::steady_clock::now(); float elapsed = std::chrono::duration(end - start).count(); std::cout << FLAGS_count << " requests in " << elapsed << "seconds; inf/sec: " << FLAGS_count * FLAGS_batch_size / elapsed << std::endl; return 0; } ================================================ FILE: examples/02_TensorRT_GRPC/src/metrics.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "metrics.h" #include #include namespace trtlab { void Metrics::Initialize(uint32_t port) { auto singleton = GetSingleton(); if(singleton->m_Exposer) { LOG(WARNING) << "Metrics already initialized. This call is ignored"; return; } std::ostringstream stream; stream << "0.0.0.0:" << port; singleton->m_Exposer = std::make_unique(stream.str()); singleton->m_Exposer->RegisterCollectable(singleton->m_Registry); } auto Metrics::GetRegistry() -> Registry& { auto singleton = Metrics::GetSingleton(); return *(singleton->m_Registry); } Metrics* Metrics::GetSingleton() { static Metrics singleton; return &singleton; } Metrics::Metrics() : m_Registry(std::make_shared()) {} Metrics::~Metrics() {} } // namespace trtlab ================================================ FILE: examples/02_TensorRT_GRPC/src/metrics.h ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include #include using prometheus::Exposer; using prometheus::Registry; namespace trtlab { class Metrics { public: static void Initialize(uint32_t port); static auto GetRegistry() -> Registry&; protected: Metrics(); virtual ~Metrics(); static Metrics* GetSingleton(); private: std::unique_ptr m_Exposer; std::shared_ptr m_Registry; }; } // namespace trtlab ================================================ FILE: examples/02_TensorRT_GRPC/src/server.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "nvml.h" #include #include #include #include #include #include #include #include #include "tensorrt/laboratory/core/affinity.h" #include "tensorrt/laboratory/core/memory/allocator.h" #include "tensorrt/laboratory/cuda/device_info.h" #include "tensorrt/laboratory/cuda/memory/cuda_pinned_host.h" #include "tensorrt/laboratory/inference_manager.h" #include "tensorrt/laboratory/runtime.h" #include "nvrpc/context.h" #include "nvrpc/executor.h" #include "nvrpc/server.h" #include "nvrpc/service.h" #include "metrics.h" using nvrpc::AsyncRPC; using nvrpc::AsyncService; using nvrpc::Context; using nvrpc::Executor; using nvrpc::Server; using trtlab::Affinity; using trtlab::Allocator; using trtlab::CudaPinnedHostMemory; using trtlab::DeviceInfo; using trtlab::Metrics; using trtlab::ThreadPool; using trtlab::TensorRT::InferenceManager; using trtlab::TensorRT::ManagedRuntime; using trtlab::TensorRT::Model; using trtlab::TensorRT::Runtime; using trtlab::TensorRT::StandardRuntime; // Flowers Protos #include "inference.grpc.pb.h" #include "inference.pb.h" using ssd::BatchInput; using ssd::BatchPredictions; using ssd::Inference; /* * Prometheus Metrics * * It is important to make collect measurements to find bottlenecks, performance issues, * and to trigger auto-scaling. */ static auto& registry = Metrics::GetRegistry(); // Summaries - Request and Compute duration on a per service basis static auto& inf_compute = prometheus::BuildSummary().Name("yais_inference_compute_duration_ms").Register(registry); static auto& inf_request = prometheus::BuildSummary().Name("yais_inference_request_duration_ms").Register(registry); static const auto& quantiles = prometheus::Summary::Quantiles{{0.5, 0.05}, {0.90, 0.01}, {0.99, 0.001}}; // Histogram - Load Ratio = Request/Compute duration - should just above one for a service // that can keep up with its current load. This metrics provides more // detailed information on the impact of the queue depth because it accounts // for request time. static const std::vector buckets = {1.25, 1.50, 2.0, 10.0, 100.0}; // unitless static auto& inf_load_ratio_fam = prometheus::BuildHistogram().Name("yais_inference_load_ratio").Register(registry); static auto& inf_load_ratio = inf_load_ratio_fam.Add({}, buckets); // Gauge - Periodically measure and report GPU power utilization. As the load increases // on the service, the power should increase proprotionally, until the power is capped // either by device limits or compute resources. At this level, the inf_load_ratio // will begin to increase under futher increases in traffic static auto& power_gauge_fam = prometheus::BuildGauge().Name("yais_gpus_power_usage").Register(registry); static auto& power_gauge = power_gauge_fam.Add({{"gpu", "0"}}); /* * External Data Source * * Attaches to a System V shared memory segment owned by an external resources. * Example: the results of an image decode service could use this mechanism to transfer * large tensors to an inference service by simply passing an offset. */ float* GetSharedMemory(const std::string& address); /* * YAIS Resources - TensorRT InferenceManager + ThreadPools + External Datasource */ class FlowersResources : public InferenceManager { public: explicit FlowersResources(int max_executions, int max_buffers, int nCuda, int nResp, float* sysv_data) : InferenceManager(max_executions, max_buffers), m_CudaThreadPool(nCuda), m_ResponseThreadPool(nResp), m_SharedMemory(sysv_data) { } ThreadPool& GetCudaThreadPool() { return m_CudaThreadPool; } ThreadPool& GetResponseThreadPool() { return m_ResponseThreadPool; } float* GetSysvOffset(size_t offset_in_bytes) { return &m_SharedMemory[offset_in_bytes / sizeof(float)]; } private: ThreadPool m_CudaThreadPool; ThreadPool m_ResponseThreadPool; float* m_SharedMemory; }; /* * nvRPC Context - Defines the logic of the RPC. */ class FlowersContext final : public Context { void ExecuteRPC(RequestType& input, ResponseType& output) final override { // Executing on a Executor threads - we don't want to block message handling, so we offload GetResources()->GetCudaThreadPool().enqueue([this, &input, &output]() { // Executed on a thread from CudaThreadPool auto model = GetResources()->GetModel("flowers"); auto buffers = GetResources()->GetBuffers(); // <=== Limited Resource; May Block !!! auto bindings = buffers->CreateBindings(model); bindings->SetBatchSize(input.batch_size()); bindings->SetHostAddress(0, GetResources()->GetSysvOffset(input.sysv_offset())); bindings->CopyToDevice(bindings->InputBindings()); auto ctx = GetResources()->GetExecutionContext(model); // <=== Limited Resource; May Block !!! ctx->Infer(bindings); bindings->CopyFromDevice(bindings->OutputBindings()); // All Async CUDA work has been queued - this thread's work is done. GetResources()->GetResponseThreadPool().enqueue([this, &input, &output, model, bindings, ctx]() mutable { // Executed on a thread from ResponseThreadPool auto compute_time = ctx->Synchronize(); ctx.reset(); // Finished with the Execution Context - Release it to competing // threads bindings->Synchronize(); // Blocks on H2D, Compute, D2H Pipeline WriteBatchPredictions(input, output, (float*)bindings->HostAddress(1)); bindings.reset(); // Finished with Buffers - Release it to competing threads auto request_time = Walltime(); output.set_compute_time(static_cast(compute_time)); output.set_total_time(static_cast(request_time)); this->FinishResponse(); // The Response is now sending; Record some metrics and be done inf_compute.Add({{"model", model->Name()}}, quantiles).Observe(compute_time * 1000); inf_request.Add({{"model", model->Name()}}, quantiles).Observe(request_time * 1000); inf_load_ratio.Observe(request_time / compute_time); }); }); } void WriteBatchPredictions(RequestType& input, ResponseType& output, float* scores) { int N = input.batch_size(); auto nClasses = GetResources()->GetModel("flowers")->GetBinding(1).elementsPerBatchItem; size_t cntr = 0; for(int p = 0; p < N; p++) { auto element = output.add_elements(); /* Customize the post-processing of the output tensor *\ float max_val = -1.0; int max_idx = -1; for (int i = 0; i < nClasses; i++) { if (max_val < scores[cntr]) { max_val = scores[cntr]; max_idx = i; } cntr++; } auto top1 = element->add_predictions(); top1->set_class_id(max_idx); top1->set_score(max_val); \* Customize the post-processing of the output tensor */ } output.set_batch_id(input.batch_id()); } }; static bool ValidateEngine(const char* flagname, const std::string& value) { struct stat buffer; return (stat(value.c_str(), &buffer) == 0); } static bool ValidateBytes(const char* flagname, const std::string& value) { trtlab::StringToBytes(value); return true; } DEFINE_string(engine, "/path/to/tensorrt.engine", "TensorRT serialized engine"); DEFINE_validator(engine, &ValidateEngine); DEFINE_string(dataset, "127.0.0.1:4444", "GRPC Dataset/SharedMemory Service Address"); DEFINE_int32(contexts, 1, "Number of Execution Contexts"); DEFINE_int32(buffers, 0, "Number of Input/Output Buffers"); DEFINE_string(runtime, "default", "TensorRT Runtime"); DEFINE_int32(execution_threads, 1, "Number of RPC execution threads"); DEFINE_int32(preprocessing_threads, 0, "Number of preprocessing threads"); DEFINE_int32(kernel_launching_threads, 1, "Number of threads to launch CUDA kernels"); DEFINE_int32(postprocessing_threads, 2, "Number of postprocessing threads"); DEFINE_string(max_recv_bytes, "10MiB", "Maximum number of bytes for incoming messages"); DEFINE_validator(max_recv_bytes, &ValidateBytes); DEFINE_int32(port, 50051, "Port to listen for gRPC requests"); DEFINE_int32(metrics, 50078, "Port to expose metrics for scraping"); int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("flowers"); ::google::ParseCommandLineFlags(&argc, &argv, true); // Set CPU Affinity to be near the GPU auto cpus = DeviceInfo::Affinity(0); Affinity::SetAffinity(cpus); // Enable metrics on port Metrics::Initialize(FLAGS_metrics); // Create a gRPC server bound to IP:PORT std::ostringstream ip_port; ip_port << "0.0.0.0:" << FLAGS_port; Server server(ip_port.str()); // Modify MaxReceiveMessageSize auto bytes = trtlab::StringToBytes(FLAGS_max_recv_bytes); server.Builder().SetMaxReceiveMessageSize(bytes); LOG(INFO) << "gRPC MaxReceiveMessageSize = " << trtlab::BytesToString(bytes); // A server can host multiple services LOG(INFO) << "Register Service (flowers::Inference) with Server"; auto inferenceService = server.RegisterAsyncService(); // An RPC has two components that need to be specified when registering with the service: // 1) Type of Execution Context (FlowersContext). The execution context defines the behavor // of the RPC, i.e. it contains the control logic for the execution of the RPC. // 2) The Request function (RequestCompute) which was generated by gRPC when compiling the // protobuf which defined the service. This function is responsible for queuing the // RPC's execution context to the LOG(INFO) << "Register RPC (flowers::Inference::Compute) with Service (flowers::Inference)"; auto rpcCompute = inferenceService->RegisterRPC(&Inference::AsyncService::RequestCompute); // Buffers default to execution contexts + 2 // Allows for 1 H2D, N TensorRT Executions, 1 D2H to be inflight auto buffers = FLAGS_buffers; if(buffers == 0) buffers = FLAGS_contexts + 2; // Initialize Resources LOG(INFO) << "Initializing Resources for RPC (flowers::Inference::Compute)"; auto rpcResources = std::make_shared( FLAGS_contexts, // number of IExecutionContexts - scratch space for DNN activations buffers, // number of host/device buffers for input/output tensors FLAGS_kernel_launching_threads, // number of threads used to execute cuda kernel launches FLAGS_postprocessing_threads, // number of threads used to write and complete responses GetSharedMemory(FLAGS_dataset) // pointer to data in shared memory ); std::shared_ptr runtime; if(FLAGS_runtime == "default") { runtime = std::make_shared(); } else if(FLAGS_runtime == "unified") { runtime = std::make_shared(); } else { LOG(FATAL) << "Invalid TensorRT Runtime"; } rpcResources->RegisterModel("flowers", runtime->DeserializeEngine(FLAGS_engine)); rpcResources->AllocateResources(); // Create Executors - Executors provide the messaging processing resources for the RPCs LOG(INFO) << "Initializing Executor"; auto executor = server.RegisterExecutor(new Executor(1)); // You can register RPC execution contexts from any registered RPC on any executor. LOG(INFO) << "Registering Execution Contexts for RPC (flowers::Inference::Compute) with Executor"; executor->RegisterContexts(rpcCompute, rpcResources, 100); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(2000), [] { // Query GPU Power nvmlDevice_t gpu; unsigned int power; CHECK_EQ(nvmlDeviceGetHandleByIndex(0, &gpu), NVML_SUCCESS) << "Failed to get Device for index=" << 0; CHECK_EQ(nvmlDeviceGetPowerUsage(gpu, &power), NVML_SUCCESS) << "Failed to get Power Usage for GPU=" << 0; power_gauge.Set((double)power * 0.001); }); } static auto pinned_memory = std::make_unique>(1024 * 1024 * 1024); float* GetSharedMemory(const std::string& address) { /* data in shared memory should go here - for the sake of quick examples just use and emptry * array */ pinned_memory->Fill((char)0); return (float*)pinned_memory->Data(); // the following code connects to a shared memory service to allow for non-serialized transfers // between microservices /* InfoRequest request; Info reply; grpc::ClientContext context; auto channel = grpc::CreateChannel(address, grpc::InsecureChannelCredentials()); auto stub = SharedMemoryDataSet::NewStub(channel); auto status = stub->GetInfo(&context, request, &reply); CHECK(status.ok()) << "Dataset shared memory request failed"; DLOG(INFO) << "SysV ShmKey: " << reply.sysv_key(); int shmid = shmget(reply.sysv_key(), 0, 0); DLOG(INFO) << "SysV ShmID: " << shmid; float* data = (float*) shmat(shmid, 0, 0); CHECK(data) << "SysV Attached failed"; return data; */ } ================================================ FILE: examples/02_TensorRT_GRPC/src/siege.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Original Copyright proivded below. * This work extends the original gRPC client examples to work with the * implemented server. * * Copyright 2015 gRPC authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ #include #include #include #include #include #include #include #include #include #include "inference.grpc.pb.h" #include "tensorrt/laboratory/core/utils.h" using grpc::Channel; using grpc::ClientAsyncResponseReader; using grpc::ClientContext; using grpc::CompletionQueue; using grpc::Status; using ssd::BatchInput; using ssd::BatchPredictions; using ssd::Inference; static int g_BatchSize = 1; class GreeterClient { public: explicit GreeterClient(std::shared_ptr channel, int max_outstanding) : stub_(Inference::NewStub(channel)), m_OutstandingMessageCount(0), m_MaxOutstandingMessageCount(max_outstanding) { } // Assembles the client's payload and sends it to the server. void SayHello(const size_t batch_id, const int batch_size, char* bytes, uint64_t total) { // Data we are sending to the server. { std::unique_lock lock(m_Mutex); m_OutstandingMessageCount++; while(m_OutstandingMessageCount >= m_MaxOutstandingMessageCount) { LOG_FIRST_N(WARNING, 10) << "Initiated Backoff - (Siege Rate > Server Compute " "Rate) - Server Queues are full."; m_Condition.wait(lock); } } auto start = std::chrono::high_resolution_clock::now(); BatchInput request; request.set_batch_id(batch_id); request.set_batch_size(batch_size); if(total) { request.set_data(bytes, total); } // Call object to store rpc data AsyncClientCall* call = new AsyncClientCall; // stub_->PrepareAsyncSayHello() creates an RPC object, returning // an instance to store in "call" but does not actually start the RPC // Because we are using the asynchronous API, we need to hold on to // the "call" instance in order to get updates on the ongoing RPC. call->response_reader = stub_->PrepareAsyncCompute(&call->context, request, &cq_); // StartCall initiates the RPC call call->response_reader->StartCall(); // Request that, upon completion of the RPC, "reply" be updated with the // server's response; "status" with the indication of whether the operation // was successful. Tag the request with the memory address of the call object. call->response_reader->Finish(&call->reply, &call->status, (void*)call); float elapsed = std::chrono::duration(std::chrono::high_resolution_clock::now() - start).count(); m_RequestCalls++; m_TotalRequestTime += elapsed; // LOG_EVERY_N(INFO, 200) << "Request overhead: " << m_TotalRequestTime/m_RequestCalls; } // Loop while listening for completed responses. // Prints out the response from the server. void AsyncCompleteRpc() { void* got_tag; bool ok = false; size_t cntr = 0; auto start = std::chrono::steady_clock::now(); float last = 0.0; // Block until the next result is available in the completion queue "cq". while(cq_.Next(&got_tag, &ok)) { // The tag in this example is the memory location of the call object AsyncClientCall* call = static_cast(got_tag); // Verify that the request was completed successfully. Note that "ok" // corresponds solely to the request for updates introduced by Finish(). GPR_ASSERT(ok); if(call->status.ok()) { // std::cout << "Greeter received: " << call->reply.batch_id() << std::endl; } else { std::cout << "RPC failed" << std::endl; } // Once we're complete, deallocate the call object. delete call; cntr++; float elapsed = std::chrono::duration(std::chrono::steady_clock::now() - start).count(); if(elapsed - last > 0.5) { LOG(INFO) << "avg. rate: " << (float)cntr / (elapsed - last) << "( " << (float)(cntr * g_BatchSize) / (elapsed - last) << " inf/sec)"; last = elapsed; cntr = 0; } { std::unique_lock lock(m_Mutex); m_OutstandingMessageCount--; } m_Condition.notify_one(); } } void Shutdown() { cq_.Shutdown(); } private: // struct for keeping state and data information struct AsyncClientCall { // Container for the data we expect from the server. BatchPredictions reply; // Context for the client. It could be used to convey extra information to // the server and/or tweak certain RPC behaviors. ClientContext context; // Storage for the status of the RPC upon completion. Status status; std::unique_ptr> response_reader; }; // Out of the passed in Channel comes the stub, stored here, our view of the // server's exposed services. std::unique_ptr stub_; // The producer-consumer queue we use to communicate asynchronously with the // gRPC runtime. CompletionQueue cq_; // mutex to help control rate std::mutex m_Mutex; std::condition_variable m_Condition; int m_OutstandingMessageCount; int m_MaxOutstandingMessageCount; float m_TotalRequestTime; size_t m_RequestCalls; }; static bool ValidateBytes(const char* flagname, const std::string& value) { trtlab::StringToBytes(value); return true; } DEFINE_int32(count, 1000000, "number of grpc messages to send"); DEFINE_int32(batch_size, 1, "batch_size"); DEFINE_int32(max_outstanding, 950, "maximum outstanding requests"); DEFINE_int32(port, 50051, "server_port"); DEFINE_double(rate, 1.0, "messages per second"); DEFINE_double(max_rate, 100000, "maximum number of messages per second when func is applied"); DEFINE_double(alpha, 0, "alpha"); DEFINE_double(beta, 1, "beta"); DEFINE_string(func, "constant", "constant, linear or cyclic"); DEFINE_string(bytes, "0B", "add extra bytes to the request payload"); DEFINE_validator(bytes, &ValidateBytes); int main(int argc, char** argv) { FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); g_BatchSize = FLAGS_batch_size; auto bytes = trtlab::StringToBytes(FLAGS_bytes); char extra_bytes[bytes]; if(bytes) LOG(INFO) << "Sending an addition " << trtlab::BytesToString(bytes) << " bytes in request payload"; // using a fixed rate of 15us per rpc call. i could adjust dynamically as i'm tracking // the call overhead, but it's close enough. auto start = std::chrono::system_clock::now(); auto walltime = [start]() -> double { return std::chrono::duration(std::chrono::system_clock::now() - start).count(); }; std::map> rates_by_name; rates_by_name["constant"] = []() -> double { return std::min(FLAGS_rate, FLAGS_max_rate); }; rates_by_name["linear"] = [start, walltime]() -> double { return std::min(FLAGS_rate + (FLAGS_alpha / 60.0) * walltime(), FLAGS_max_rate); }; rates_by_name["cyclic"] = [start, walltime]() -> double { return std::min(FLAGS_rate + FLAGS_alpha * std::sin(2.0 * 3.14159 * (FLAGS_beta / 60.0) * walltime()), FLAGS_max_rate); }; auto search = rates_by_name.find(FLAGS_func); if(search == rates_by_name.end()) { LOG(FATAL) << "--func must be constant, linear or cyclic; your value = " << FLAGS_func; } auto sleepy = [search]() -> double { auto sleep_time = ((std::chrono::seconds(1) / std::max((search->second)(), 2.0))) - std::chrono::microseconds(15); return std::chrono::duration(sleep_time).count(); }; // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). std::ostringstream ip_port; ip_port << "localhost:" << FLAGS_port; grpc::ChannelArguments ch_args; ch_args.SetMaxReceiveMessageSize(-1); GreeterClient greeter( grpc::CreateCustomChannel(ip_port.str(), grpc::InsecureChannelCredentials(), ch_args), FLAGS_max_outstanding); // Spawn reader thread that loops indefinitely std::thread thread_ = std::thread(&GreeterClient::AsyncCompleteRpc, &greeter); for(size_t i = 0; i < FLAGS_count; i++) { greeter.SayHello(i, FLAGS_batch_size, extra_bytes, bytes); // The actual RPC call! auto start = std::chrono::high_resolution_clock::now(); while(std::chrono::duration(std::chrono::high_resolution_clock::now() - start) .count() < sleepy()) { std::this_thread::yield(); } } greeter.Shutdown(); thread_.join(); // blocks forever auto elapsed = walltime(); std::cout << FLAGS_count << " requests in " << elapsed << "seconds; inf/sec: " << FLAGS_count * FLAGS_batch_size / elapsed << std::endl; return 0; } ================================================ FILE: examples/02_TensorRT_GRPC/src/sync-client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Original Copyright proivded below. * This work extends the original gRPC client examples to work with the * implemented server. * * Copyright 2015 gRPC authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ #include #include #include #include #include #include #include #include "inference.grpc.pb.h" using grpc::Channel; using grpc::ClientContext; using grpc::Status; using ssd::BatchInput; using ssd::BatchPredictions; using ssd::Inference; class SimpleClient { public: SimpleClient(std::shared_ptr channel) : stub_(Inference::NewStub(channel)) {} // Assembles the client's payload, sends it and presents the response back // from the server. int Compute(const int batch_id, const int batch_size) { // Data we are sending to the server. BatchInput request; request.set_batch_id(batch_id); request.set_batch_size(batch_size); // Container for the data we expect from the server. BatchPredictions reply; // Context for the client. It could be used to convey extra information to // the server and/or tweak certain RPC behaviors. ClientContext context; // The actual RPC. Status status = stub_->Compute(&context, request, &reply); // Act upon its status. if(status.ok()) { return reply.batch_id(); } else { std::cout << status.error_code() << ": " << status.error_message() << std::endl; return -1; } } private: std::unique_ptr stub_; }; DEFINE_int32(count, 1000, "number of grpc messages to send"); DEFINE_int32(port, 50051, "server_port"); DEFINE_int32(batch, 1, "batch size"); int main(int argc, char** argv) { // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); std::ostringstream ip_port; ip_port << "localhost:" << FLAGS_port; SimpleClient client(grpc::CreateChannel(ip_port.str(), grpc::InsecureChannelCredentials())); auto start = std::chrono::steady_clock::now(); for(int i = 0; i < FLAGS_count; i++) { auto reply = client.Compute(i, FLAGS_batch); if(reply == -1 || reply != i) std::cout << "BatchId received: " << reply << std::endl; } auto end = std::chrono::steady_clock::now(); float elapsed = std::chrono::duration(end - start).count(); std::cout << FLAGS_count << " requests in " << elapsed << " seconds; inf/sec: " << FLAGS_count * FLAGS_batch / elapsed << std::endl; return 0; } ================================================ FILE: examples/03_Batching/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add_executable(streaming-service-echo.x streaming-service.cc ) target_link_libraries(streaming-service-echo.x trtlab::nvrpc echo-protos gflags ) add_executable(batching-service-echo.x inference-batcher.cc ) target_link_libraries(batching-service-echo.x trtlab::nvrpc echo-protos gflags ) ================================================ FILE: examples/03_Batching/README.md ================================================ # Batching Service A batching service is a service that trying to collect sets of similar requests into a collective batch which can be executed in a single-shot. #### Why do we want to batch? In the case of Deep Neural Networks, batching can improve the computational efficiency of executing on a GPU by increases the operational intensity, i.e. improving the ratio of the number of math operations per memory transaction. This translates to improved throughput, better hardware utilization and cost reductions. #### Sounds great, but what's the catch? In many cases, batching can add latency to an individual request. Because a batch of more than 1 item, BatchN, computed as a single unit, the time to compute BatchN is greater than Batch1. However, in many cases, the time to compute delta between Batch1 and Batch2/4/8 is fairly small due to the improved operational efficiency. Secondly, because batching requires requests to be collected, there is a timed collection window prior to the compute. The first request in a batch sees the longest latency. The worst-case increased latency is bounded by the following formula: ``` worst_additional_latency = batch_window_timeout + batchN_compute - batch1_compute ``` #### When to Batch? You want to batch requests when your service has very high-load and you can tolerate minor increases in latency. Throughput improvements can be 2-5x which translates into direct cost savings. #### What does this Batching Service do for me? The basis YAIS service examples [01_GRPC](../01_GRPC) and [02_GRPC_TensorRT](../02_GRPC_TensorRT) have implemented high-performance send/recv unary services. That is, the client sends a request which is computed and a response is returned. The client could in theory create a single message is itself a batch, i.e. multiple images files or sentences to be translated. However, in most common realworld usecases, the clients of a service send a single item at a time. This keeps the logic simple and lifecycle of the request simple. If this is your RPC definition, ``` service Inference { rpc Compute (Input) returns (Output) {} } ``` Then, instead of implementing `rpc Compute` to perform the inference computation, instead, we hyjack that RPC and turn it into a batcher. In the [`inference-batcher.cc`](inference-batcher.cc) file, you will see that we are indeed we implement our batching service as the `Compute` method. The batching service collecting incoming `Input` requests and forwards them via a gRPC stream to a service that accepts a "batching stream". A “batching stream” is a stream where the endpoint service reads and collects the elements of the stream until the client signifies it is done writing. That is the signal at which YAIS performs a single batched inference call on the concatenated set of requests that came in over the stream. After the inference calculation is complete, the server writes the results for each request item to the stream. That is, for each request that came in on the stream, the server is expected to return a response. We still need to compute inference on the batching stream. This is performed by [streaming-service.cc](streaming-service.cc). The `streaming-service` implements the `BatchedCompute` RPC method using a `BatchingContext`. ``` service Inference { rpc Compute (Input) returns (Output) {} rpc BatchedCompute (stream Input) returns (stream Output) {} } ``` Because the stream consists of an array of individual messages, you simply need to make minor modifications to your existing Batch1 service to preprocess and concat the incoming requests together to form a single batch compute. For each `Input` item in the stream, it is expected that the service writes an `Output` response in the same order as the inputs (FIFO). The batching service doesn’t need to know anything about the format of the `Input`/`Output` messages. It simply accepts and forwards them. The result is that this batching service example should be able to work with any unary gRPC service with any request/response message. You simply need to implement a streaming service capable of handling the forwarding stream. ## Running Example ``` ./launch_batching.sh ``` ``` ... # streaming service startup ... # batching service startup Starting a shell keeping the services and load-balancer running... Try python unary_client.py - exit shell to kill services Batching Subshell: python unary_client.py I0822 14:48:18.900671 50 inference-batcher.cc:344] incoming unary request I0822 14:48:18.902642 41 inference-batcher.cc:109] Client using CQ: 0x14470f0 I0822 14:48:18.902680 41 inference-batcher.cc:140] Starting Batch Forwarding of Size 1 for Tag 0x1458450 I0822 14:48:18.903472 35 streaming-service.cc:61] Recieved request with batch_id=78 I0822 14:48:18.903504 35 streaming-service.cc:54] Response with batch_id=78 I0822 14:48:18.903656 47 inference-batcher.cc:243] Batch Forwarding Completed for Tag 0x1458450 Received msg with batch_id=78 ``` ================================================ FILE: examples/03_Batching/inference-batcher.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "nvrpc/context.h" #include "nvrpc/executor.h" #include "nvrpc/server.h" #include "tensorrt/laboratory/core/thread_pool.h" using nvrpc::Context; using nvrpc::Executor; using nvrpc::Server; using trtlab::ThreadPool; #include "moodycamel/blockingconcurrentqueue.h" using moodycamel::BlockingConcurrentQueue; using moodycamel::ConsumerToken; using moodycamel::ProducerToken; #include "echo.grpc.pb.h" #include "echo.pb.h" /** * @brief Batching Service for Unary Requests * * Exposes a Unary (send/recv) interface for a given RPC, but rather than * computing the RPC, the service simply batches the incoming requests and * forwards them via a gRPC stream to a service that implements the actual * compute portion of the RPC. * * The backend compute service is not a Unary service. Rather it must * implemented the LifeCycleBatching service Context, i.e. BatchingContext. * The other application in this folder implements the backend service. * * Streams are used as a forwarding mechanism because of how they interact * with a load-balancer. Unlike unary requests which get balanced on each * request, a stream only get balanced when it is opened. All items of a stream * go to the same endpoint service. * * @tparam ServiceType * @tparam Request * @tparam Response */ template struct BatchingService { using Callback = std::function; struct MessageType { Request* request; Response* response; Callback callback; }; /** * @brief Forwards incoming Unary requests via a gRPC Stream to * a Batched Steaming Service that implements the actual RPC */ class Client { public: using PrepareFunc = std::function>( ::grpc::ClientContext*, ::grpc::CompletionQueue*)>; Client(PrepareFunc prepare_func, std::shared_ptr thread_pool) : m_PrepareFunc(prepare_func), m_ThreadPool(thread_pool), m_CurrentCQ(0) { for(decltype(m_ThreadPool->Size()) i = 0; i < m_ThreadPool->Size(); i++) { LOG(INFO) << "Starting Client Progress Engine #" << i; m_CQs.emplace_back(new ::grpc::CompletionQueue); auto cq = m_CQs.back().get(); m_ThreadPool->enqueue([this, cq] { ProgressEngine(*cq); }); } } void WriteAndCloseStream(uint32_t messages_count, MessageType* messages) { auto cq = m_CQs[++m_CurrentCQ % m_CQs.size()].get(); LOG(INFO) << "Client using CQ: " << (void*)cq; auto ctx = new Call; for(uint32_t i = 0; i < messages_count; i++) { ctx->Push(messages[i]); } ctx->m_Stream = m_PrepareFunc(&ctx->m_Context, cq); ctx->Start(); } private: class Call { public: Call() : m_Started(false), m_NextState(&Call::StateInvalid) {} virtual ~Call() {} void Push(MessageType& message) { if(m_Started) LOG(FATAL) << "Stream started; No pushing allowed."; m_Requests.push(message.request); m_Responses.push(message.response); m_CallbackByResponse[message.response] = message.callback; } void Start() { LOG(INFO) << "Starting Batch Forwarding of Size " << m_Requests.size() << " for Tag " << Tag(); m_NextState = &Call::StateWriteDone; m_Stream->StartCall(Tag()); } private: bool RunNextState(bool ok) { bool ret = (this->*m_NextState)(ok); if(!ret) DLOG(INFO) << "RunNextState returning false"; return ret; } void* Tag() { return static_cast(this); } bool Fail() { LOG(FATAL) << "Fail"; return false; } void WriteNext() { if(m_Requests.size()) { auto request = m_Requests.front(); m_Requests.pop(); DLOG(INFO) << "forwarding request"; m_NextState = &Call::StateWriteDone; m_Stream->Write(*request, Tag()); } else { DLOG(INFO) << "closing client stream for writing"; m_NextState = &Call::StateCloseStreamDone; m_Stream->WritesDone(Tag()); } } void ReadNext() { if(m_Responses.size()) { DLOG(INFO) << "waiting on response"; auto response = m_Responses.front(); m_NextState = &Call::StateReadDone; m_Stream->Read(response, Tag()); } else { DLOG(INFO) << "waiting on finished message from server"; m_NextState = &Call::StateFinishedDone; m_Stream->Finish(&m_Status, Tag()); } } bool StateWriteDone(bool ok) { if(!ok) return Fail(); DLOG(INFO) << "request forwarded!"; WriteNext(); return true; } bool StateReadDone(bool ok) { if(!ok) return Fail(); DLOG(INFO) << "response received"; auto response = m_Responses.front(); m_Responses.pop(); auto search = m_CallbackByResponse.find(response); if(search == m_CallbackByResponse.end()) LOG(FATAL) << "Callback for response not found"; ReadNext(); // Execute callback which will complete the unary request for this stream item DLOG(INFO) << "triggering callback on held receive context"; search->second(true); DLOG(INFO) << "callback completed"; return true; } bool StateCloseStreamDone(bool ok) { if(!ok) return Fail(); DLOG(INFO) << "closed client stream for writing"; ReadNext(); return true; } bool StateFinishedDone(bool ok) { if(m_Status.ok()) DLOG(INFO) << "ClientContext: " << Tag() << " finished with OK"; else DLOG(INFO) << "ClientContext: " << Tag() << " finished with CANCELLED"; m_NextState = &Call::StateInvalid; LOG(INFO) << "Batch Forwarding Completed for Tag " << Tag(); return false; } bool StateInvalid(bool ok) { LOG(FATAL) << "This should never be called"; } private: std::queue m_Requests; std::queue m_Responses; std::map m_CallbackByResponse; bool (Call::*m_NextState)(bool); ::grpc::Status m_Status; ::grpc::ClientContext m_Context; std::unique_ptr<::grpc::ClientAsyncReaderWriter> m_Stream; bool m_Started; friend class Client; }; void ProgressEngine(::grpc::CompletionQueue& cq) { void* tag; bool ok = false; while(cq.Next(&tag, &ok)) { CHECK(ok) << "not ok"; Call* call = static_cast(tag); if(!call->RunNextState(ok)) { DLOG(INFO) << "Deleting Stream: " << tag; delete call; } } } int m_CurrentCQ; PrepareFunc m_PrepareFunc; std::shared_ptr m_ThreadPool; std::vector> m_CQs; }; class Resources : public ::trtlab::Resources { public: Resources(uint32_t max_batch_size, uint64_t timeout, std::shared_ptr client) : m_MaxBatchsize(max_batch_size), m_Timeout(timeout), m_Client(client) { } virtual void PreprocessRequest(Request* req) {} void Push(Request* req, Response* resp, Callback callback) { // thread_local ProducerToken token(m_MessageQueue); // m_MessageQueue.enqueue(token, MessageType(req, resp, callback)); PreprocessRequest(req); m_MessageQueue.enqueue(MessageType{req, resp, callback}); } void ProgressEngine() { constexpr uint64_t quanta = 100; const double timeout = static_cast(m_Timeout - quanta) / 1000000.0; size_t total_count; size_t max_batch; std::vector messages; messages.resize(m_MaxBatch) thread_local ConsumerToken token(m_MessageQueue); for(;;) { max_batch = m_MaxBatchsize; total_count = 0; auto start = std::chrono::steady_clock::now(); auto elapsed = [start]() -> double { return std::chrono::duration(std::chrono::steady_clock::now() - start) .count(); }; // initial pull - if not successful, restart loop // if successful, then open a stream, push message to stream and continue to collect // requests until the max_batch_size is reach for the timeout is triggered // finish sending // r do { auto count = m_MessageQueue.wait_dequeue_bulk_timed( token, &messages[total_count], max_batch, quanta); total_count += count; max_batch -= count; } while(total_count && total_count < m_MaxBatchsize && elapsed() < timeout); if(total_count) { m_Client->WriteAndCloseStream(total_count, messages); } } } private: size_t m_MaxBatchsize; uint64_t m_Timeout; std::shared_ptr m_Client; BlockingConcurrentQueue m_MessageQueue; }; class ReceiveContext final : public ::nvrpc::Context { void ExecuteRPC(Request& request, Response& response) final override { LOG(INFO) << "incoming unary request"; this->GetResources()->Push(&request, &response, [this](bool ok) { if(ok) this->FinishResponse(); else { LOG(INFO) << "shoot"; this->CancelResponse(); } }); } }; }; DEFINE_uint32(max_batch_size, 8, "Maximum batch size to collect and foward"); DEFINE_uint64(timeout_usecs, 2000, "Batching window timeout in microseconds"); DEFINE_uint32(max_batches_in_flight, 1, "Maximum number of forwarded batches"); DEFINE_uint32(receiving_threads, 1, "Number of Forwarding threads"); DEFINE_uint32(forwarding_threads, 1, "Number of Forwarding threads"); DEFINE_string(forwarding_target, "localhost:50051", "Batched Compute Service / Load-Balancer"); using InferenceBatchingService = BatchingService; int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("simpleBatchingService"); ::google::ParseCommandLineFlags(&argc, &argv, true); auto forwarding_threads = std::make_shared(FLAGS_forwarding_threads); auto channel = grpc::CreateChannel(FLAGS_forwarding_target, grpc::InsecureChannelCredentials()); auto stub = ::simple::Inference::NewStub(channel); auto forwarding_prepare_func = [&stub](::grpc::ClientContext * context, ::grpc::CompletionQueue * cq) -> auto { return std::move(stub->PrepareAsyncBatchedCompute(context, cq)); }; auto client = std::make_shared(forwarding_prepare_func, forwarding_threads); auto rpcResources = std::make_shared( FLAGS_max_batch_size, FLAGS_timeout_usecs, client); Server server("0.0.0.0:50049"); auto recvService = server.RegisterAsyncService<::simple::Inference>(); auto rpcCompute = recvService->RegisterRPC( &::simple::Inference::AsyncService::RequestCompute); uint64_t context_count = FLAGS_max_batch_size * FLAGS_max_batches_in_flight; uint64_t contexts_per_executor_thread = std::max(context_count / FLAGS_receiving_threads, 1UL); auto executor = server.RegisterExecutor(new Executor(FLAGS_receiving_threads)); executor->RegisterContexts(rpcCompute, rpcResources, contexts_per_executor_thread); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(1), [rpcResources] { rpcResources->ProgressEngine(); }); return 0; } ================================================ FILE: examples/03_Batching/launch_batching.sh ================================================ #!/bin/bash -e # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # cleanup() { kill $(jobs -p) ||: } trap "cleanup" EXIT SIGINT SIGTERM sleep 1 echo "starting streaming services" /work/build/examples/03_Batching/streaming-service-echo.x & wait-for-it.sh localhost:50051 --timeout=0 -- echo "Streaming service is ready." echo "starting batching service" /work/build/examples/03_Batching/batching-service-echo.x & wait-for-it.sh localhost:50049 --timeout=0 -- echo "Batching service is ready." echo echo "Starting a shell keeping the services and load-balancer running..." echo "Try python unary_client.py - exit shell to kill services" bash --rcfile <(echo "PS1='Batching Subshell: '") ================================================ FILE: examples/03_Batching/simple_batching_client.py ================================================ import grpc import simple_pb2 import simple_pb2_grpc def run(): with grpc.insecure_channel('localhost:50051') as channel: stub = simple_pb2_grpc.InferenceStub(channel) def requests(): messages = [simple_pb2.Input(batch_id=i) for i in range(10)] for msg in messages: print("Sending Stream batch_id={}".format(msg.batch_id)) yield msg responses = stub.BatchedCompute(requests()) for resp in responses: print("Received msg on stream with batch_id={}".format(resp.batch_id)) if __name__ == "__main__": run() ================================================ FILE: examples/03_Batching/simple_pb2.py ================================================ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: simple.proto import sys _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database from google.protobuf import descriptor_pb2 # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() DESCRIPTOR = _descriptor.FileDescriptor( name='simple.proto', package='simple', syntax='proto3', serialized_pb=_b('\n\x0csimple.proto\x12\x06simple\"\x19\n\x05Input\x12\x10\n\x08\x62\x61tch_id\x18\x01 \x01(\x04\"\x1a\n\x06Output\x12\x10\n\x08\x62\x61tch_id\x18\x01 \x01(\x04\x32n\n\tInference\x12*\n\x07\x43ompute\x12\r.simple.Input\x1a\x0e.simple.Output\"\x00\x12\x35\n\x0e\x42\x61tchedCompute\x12\r.simple.Input\x1a\x0e.simple.Output\"\x00(\x01\x30\x01\x62\x06proto3') ) _INPUT = _descriptor.Descriptor( name='Input', full_name='simple.Input', filename=None, file=DESCRIPTOR, containing_type=None, fields=[ _descriptor.FieldDescriptor( name='batch_id', full_name='simple.Input.batch_id', index=0, number=1, type=4, cpp_type=4, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), ], extensions=[ ], nested_types=[], enum_types=[ ], options=None, is_extendable=False, syntax='proto3', extension_ranges=[], oneofs=[ ], serialized_start=24, serialized_end=49, ) _OUTPUT = _descriptor.Descriptor( name='Output', full_name='simple.Output', filename=None, file=DESCRIPTOR, containing_type=None, fields=[ _descriptor.FieldDescriptor( name='batch_id', full_name='simple.Output.batch_id', index=0, number=1, type=4, cpp_type=4, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), ], extensions=[ ], nested_types=[], enum_types=[ ], options=None, is_extendable=False, syntax='proto3', extension_ranges=[], oneofs=[ ], serialized_start=51, serialized_end=77, ) DESCRIPTOR.message_types_by_name['Input'] = _INPUT DESCRIPTOR.message_types_by_name['Output'] = _OUTPUT _sym_db.RegisterFileDescriptor(DESCRIPTOR) Input = _reflection.GeneratedProtocolMessageType('Input', (_message.Message,), dict( DESCRIPTOR = _INPUT, __module__ = 'simple_pb2' # @@protoc_insertion_point(class_scope:simple.Input) )) _sym_db.RegisterMessage(Input) Output = _reflection.GeneratedProtocolMessageType('Output', (_message.Message,), dict( DESCRIPTOR = _OUTPUT, __module__ = 'simple_pb2' # @@protoc_insertion_point(class_scope:simple.Output) )) _sym_db.RegisterMessage(Output) _INFERENCE = _descriptor.ServiceDescriptor( name='Inference', full_name='simple.Inference', file=DESCRIPTOR, index=0, options=None, serialized_start=79, serialized_end=189, methods=[ _descriptor.MethodDescriptor( name='Compute', full_name='simple.Inference.Compute', index=0, containing_service=None, input_type=_INPUT, output_type=_OUTPUT, options=None, ), _descriptor.MethodDescriptor( name='BatchedCompute', full_name='simple.Inference.BatchedCompute', index=1, containing_service=None, input_type=_INPUT, output_type=_OUTPUT, options=None, ), ]) _sym_db.RegisterServiceDescriptor(_INFERENCE) DESCRIPTOR.services_by_name['Inference'] = _INFERENCE # @@protoc_insertion_point(module_scope) ================================================ FILE: examples/03_Batching/simple_pb2_grpc.py ================================================ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! import grpc import simple_pb2 as simple__pb2 class InferenceStub(object): # missing associated documentation comment in .proto file pass def __init__(self, channel): """Constructor. Args: channel: A grpc.Channel. """ self.Compute = channel.unary_unary( '/simple.Inference/Compute', request_serializer=simple__pb2.Input.SerializeToString, response_deserializer=simple__pb2.Output.FromString, ) self.BatchedCompute = channel.stream_stream( '/simple.Inference/BatchedCompute', request_serializer=simple__pb2.Input.SerializeToString, response_deserializer=simple__pb2.Output.FromString, ) class InferenceServicer(object): # missing associated documentation comment in .proto file pass def Compute(self, request, context): # missing associated documentation comment in .proto file pass context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') def BatchedCompute(self, request_iterator, context): # missing associated documentation comment in .proto file pass context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') def add_InferenceServicer_to_server(servicer, server): rpc_method_handlers = { 'Compute': grpc.unary_unary_rpc_method_handler( servicer.Compute, request_deserializer=simple__pb2.Input.FromString, response_serializer=simple__pb2.Output.SerializeToString, ), 'BatchedCompute': grpc.stream_stream_rpc_method_handler( servicer.BatchedCompute, request_deserializer=simple__pb2.Input.FromString, response_serializer=simple__pb2.Output.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( 'simple.Inference', rpc_method_handlers) server.add_generic_rpc_handlers((generic_handler,)) ================================================ FILE: examples/03_Batching/streaming-service.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "nvrpc/context.h" #include "nvrpc/executor.h" #include "nvrpc/rpc.h" #include "nvrpc/server.h" #include "nvrpc/service.h" #include "tensorrt/laboratory/core/resources.h" #include "tensorrt/laboratory/core/thread_pool.h" using nvrpc::AsyncRPC; using nvrpc::AsyncService; using nvrpc::BatchingContext; using nvrpc::Executor; using nvrpc::Server; using trtlab::Resources; using trtlab::ThreadPool; #include "echo.grpc.pb.h" #include "echo.pb.h" class SimpleContext final : public BatchingContext { void ExecuteRPC(std::vector& inputs, std::vector& outputs) final override { for(auto input = inputs.cbegin(); input != inputs.cend(); input++) { auto output = outputs.emplace(outputs.end()); output->set_batch_id(input->batch_id()); LOG(INFO) << "Response with batch_id=" << output->batch_id(); } this->FinishResponse(); } void OnRequestReceived(const RequestType& request) final override { LOG(INFO) << "Recieved request with batch_id=" << request.batch_id(); } }; int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("simpleServer"); ::google::ParseCommandLineFlags(&argc, &argv, true); Server server("0.0.0.0:50051"); LOG(INFO) << "Register Service (simple::Inference)"; auto simpleInference = server.RegisterAsyncService(); LOG(INFO) << "Register RPC (simple::Inference::BatchedCompute) with Service (simple::Inference)"; auto rpcCompute = simpleInference->RegisterRPC( &simple::Inference::AsyncService::RequestBatchedCompute); LOG(INFO) << "Initializing Resources for RPC (simple::Inference::BatchedCompute)"; auto rpcResources = std::make_shared(); LOG(INFO) << "Creating Executor"; auto executor = server.RegisterExecutor(new Executor(1)); LOG(INFO) << "Creating Execution Contexts for RPC (simple::Inference::Compute) with Executor"; executor->RegisterContexts(rpcCompute, rpcResources, 10); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(2000), [] { // This is a timeout loop executed every 2seconds // Run() with no arguments will run an empty timeout loop every 5 seconds. // RunAsync() will return immediately, its your responsibility to ensure the // server doesn't go out of scope or a Shutdown will be triggered on your services. }); } ================================================ FILE: examples/03_Batching/unary_client.py ================================================ import grpc import simple_pb2 import simple_pb2_grpc def run(): with grpc.insecure_channel('localhost:50049') as channel: stub = simple_pb2_grpc.InferenceStub(channel) response = stub.Compute(simple_pb2.Input(batch_id=78)) print("Received msg with batch_id={}".format(response.batch_id)) if __name__ == "__main__": run() ================================================ FILE: examples/04_Middleman/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add_executable(middleman-client.x middleman-client.cc ) target_link_libraries(middleman-client.x trtlab::nvrpc nv-inference-protos gflags ) ================================================ FILE: examples/04_Middleman/middleman-client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "tensorrt/laboratory/core/memory/allocator.h" #include "tensorrt/laboratory/core/memory/malloc.h" using trtlab::Allocator; using trtlab::Malloc; #include "nvrpc/context.h" #include "nvrpc/executor.h" #include "nvrpc/server.h" using nvrpc::Context; using nvrpc::Executor; using nvrpc::Server; using trtlab::ThreadPool; #include "moodycamel/blockingconcurrentqueue.h" using moodycamel::BlockingConcurrentQueue; using moodycamel::ConsumerToken; using moodycamel::ProducerToken; // NVIDIA Inference Server Protos #include "nvidia_inference.grpc.pb.h" #include "nvidia_inference.pb.h" namespace easter = ::nvidia::inferenceserver; /* using nvidia::inferenceserver::GRPCService; using nvidia::inferenceserver::InferRequest; using nvidia::inferenceserver::InferResponse; */ /** * @brief Batching Service for Unary Requests * * Exposes a Unary (send/recv) interface for a given RPC, but rather than * computing the RPC, the service simply batches the incoming requests and * forwards them via a gRPC stream to a service that implements the actual * compute portion of the RPC. * * The backend compute service is not a Unary service. Rather it must * implemented the LifeCycleBatching service Context, i.e. BatchingContext. * The other application in this folder implements the backend service. * * Streams are used as a forwarding mechanism because of how they interact * with a load-balancer. Unlike unary requests which get balanced on each * request, a stream only get balanced when it is opened. All items of a stream * go to the same endpoint service. * * @tparam ServiceType * @tparam Request * @tparam Response */ template struct MiddlemanService { using Callback = std::function; struct MessageType { Request* request; Response* response; Callback callback; }; /** * @brief Forwards incoming Unary requests via a gRPC Stream to * a Batched Steaming Service that implements the actual RPC */ class Client { public: using PrepareFunc = std::function>( ::grpc::ClientContext*, const Request&, ::grpc::CompletionQueue*)>; Client(PrepareFunc prepare_func, std::shared_ptr thread_pool) : m_PrepareFunc(prepare_func), m_ThreadPool(thread_pool), m_CurrentCQ(0) { for(decltype(m_ThreadPool->Size()) i = 0; i < m_ThreadPool->Size(); i++) { LOG(INFO) << "Starting Client Progress Engine #" << i; m_CQs.emplace_back(new ::grpc::CompletionQueue); auto cq = m_CQs.back().get(); m_ThreadPool->enqueue([this, cq] { ProgressEngine(*cq); }); } } void WriteAndCloseStream(uint32_t messages_count, MessageType* messages) { auto cq = m_CQs[++m_CurrentCQ % m_CQs.size()].get(); DLOG(INFO) << "Client using CQ: " << (void*)cq; CHECK_EQ(1U, messages_count) << "forwarder; not batcher"; auto ctx = new Call; for(uint32_t i = 0; i < messages_count; i++) { ctx->Push(messages[i]); } ctx->m_Reader = m_PrepareFunc(&ctx->m_Context, *ctx->m_Request, cq); ctx->m_Reader->StartCall(); ctx->m_Reader->Finish(ctx->m_Response, &ctx->m_Status, ctx->Tag()); } private: class Call { public: Call() : m_NextState(&Call::StateFinishedDone) {} virtual ~Call() {} void Push(MessageType& message) { m_Request = message.request; m_Response = message.response; m_Callback = message.callback; } private: bool RunNextState(bool ok) { bool ret = (this->*m_NextState)(ok); if(!ret) DLOG(INFO) << "RunNextState returning false"; return ret; } void* Tag() { return static_cast(this); } bool Fail() { LOG(FATAL) << "Fail"; return false; } bool StateFinishedDone(bool ok) { if(m_Status.ok()) DLOG(INFO) << "ClientContext: " << Tag() << " finished with OK"; else DLOG(INFO) << "ClientContext: " << Tag() << " finished with CANCELLED"; m_Callback(m_Status.ok()); DLOG(INFO) << "Forwarding Completed for Tag " << Tag(); return false; } private: Request* m_Request; Response* m_Response; Callback m_Callback; bool (Call::*m_NextState)(bool); ::grpc::Status m_Status; ::grpc::ClientContext m_Context; std::unique_ptr<::grpc::ClientAsyncResponseReader> m_Reader; friend class Client; }; void ProgressEngine(::grpc::CompletionQueue& cq) { void* tag; bool ok = false; while(cq.Next(&tag, &ok)) { CHECK(ok) << "not ok"; Call* call = static_cast(tag); if(!call->RunNextState(ok)) { DLOG(INFO) << "Deleting Stream: " << tag; delete call; } } } int m_CurrentCQ; PrepareFunc m_PrepareFunc; std::shared_ptr m_ThreadPool; std::vector> m_CQs; }; public: class Resources : public ::trtlab::Resources { public: Resources(uint32_t max_batch_size, uint64_t timeout, std::shared_ptr client) : m_MaxBatchsize(max_batch_size), m_Timeout(timeout), m_Client(client) { } virtual void PreprocessRequest(Request* req) {} void Push(Request* req, Response* resp, Callback callback) { // thread_local ProducerToken token(m_MessageQueue); // m_MessageQueue.enqueue(token, MessageType(req, resp, callback)); PreprocessRequest(req); m_MessageQueue.enqueue(MessageType{req, resp, callback}); } void ProgressEngine() { constexpr uint64_t quanta = 100; const double timeout = static_cast(m_Timeout - quanta) / 1000000.0; size_t total_count; size_t max_batch; thread_local ConsumerToken token(m_MessageQueue); for(;;) { MessageType messages[m_MaxBatchsize]; max_batch = m_MaxBatchsize; total_count = 0; auto start = std::chrono::steady_clock::now(); auto elapsed = [start]() -> double { return std::chrono::duration(std::chrono::steady_clock::now() - start) .count(); }; do { auto count = m_MessageQueue.wait_dequeue_bulk_timed( token, &messages[total_count], max_batch, quanta); CHECK_LE(count, max_batch); total_count += count; max_batch -= count; } while(total_count && total_count < m_MaxBatchsize && elapsed() < timeout); if(total_count) { m_Client->WriteAndCloseStream(total_count, messages); } } } private: size_t m_MaxBatchsize; uint64_t m_Timeout; std::shared_ptr m_Client; BlockingConcurrentQueue m_MessageQueue; }; class ReceiveContext final : public ::nvrpc::Context { void ExecuteRPC(Request& request, Response& response) final override { DLOG(INFO) << "incoming unary request"; this->GetResources()->Push(&request, &response, [this](bool ok) { if(ok) this->FinishResponse(); else { LOG(INFO) << "shoot"; this->CancelResponse(); } }); } }; }; DEFINE_uint32(max_batch_size, 1, "Maximum batch size to collect and foward"); DEFINE_uint64(timeout_usecs, 200, "Batching window timeout in microseconds"); DEFINE_uint32(max_batches_in_flight, 300, "Maximum number of forwarded batches"); DEFINE_uint32(receiving_threads, 2, "Number of Forwarding threads"); DEFINE_uint32(forwarding_threads, 2, "Number of Forwarding threads"); DEFINE_string(forwarding_target, "localhost:8001", "Batched Compute Service / Load-Balancer"); using InferMiddlemanService = MiddlemanService; using StatusMiddlemanService = MiddlemanService; class DemoMiddlemanService : public InferMiddlemanService { public: class Resources : public InferMiddlemanService::Resources { public: using InferMiddlemanService::Resources::Resources; void PreprocessRequest(easter::InferRequest* req) override { static auto local_data = std::make_unique>(10 * 1024 * 1024); DLOG(INFO) << "Boom - preprocess request here!"; auto bytes = req->meta_data().batch_size() * req->meta_data().input(0).byte_size(); CHECK_EQ(0, req->raw_input_size()); req->add_raw_input(local_data->Data(), bytes); } }; }; int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("easterForwardingService"); ::google::ParseCommandLineFlags(&argc, &argv, true); grpc::ChannelArguments ch_args; ch_args.SetMaxReceiveMessageSize(-1); auto channel = grpc::CreateCustomChannel(FLAGS_forwarding_target, grpc::InsecureChannelCredentials(), ch_args); // GRPCService::Infer async forwarder auto forwarding_threads = std::make_shared(FLAGS_forwarding_threads); auto stub = ::easter::GRPCService::NewStub(channel); auto forwarding_prepare_func = [&stub](::grpc::ClientContext * context, const ::easter::InferRequest& request, ::grpc::CompletionQueue* cq) -> auto { return std::move(stub->PrepareAsyncInfer(context, request, cq)); }; auto client = std::make_shared(forwarding_prepare_func, forwarding_threads); // GRPCService::Status async forwarder auto status_forwarding_threads = std::make_shared(1); auto status_stub = ::easter::GRPCService::NewStub(channel); auto status_forwarding_prepare_func = [&stub](::grpc::ClientContext * context, const ::easter::StatusRequest& request, ::grpc::CompletionQueue* cq) -> auto { return std::move(stub->PrepareAsyncStatus(context, request, cq)); }; auto status_client = std::make_shared( status_forwarding_prepare_func, status_forwarding_threads); auto rpcResources = std::make_shared( FLAGS_max_batch_size, FLAGS_timeout_usecs, client); auto statusResources = std::make_shared( FLAGS_max_batch_size, FLAGS_timeout_usecs, status_client); Server server("0.0.0.0:50049"); auto bytes = trtlab::StringToBytes("100MiB"); server.Builder().SetMaxReceiveMessageSize(bytes); LOG(INFO) << "gRPC MaxReceiveMessageSize = " << trtlab::BytesToString(bytes); auto recvService = server.RegisterAsyncService<::easter::GRPCService>(); auto rpcCompute = recvService->RegisterRPC( &::easter::GRPCService::AsyncService::RequestInfer); auto rpcStatus = recvService->RegisterRPC( &::easter::GRPCService::AsyncService::RequestStatus); uint64_t context_count = FLAGS_max_batch_size * FLAGS_max_batches_in_flight; uint64_t contexts_per_executor_thread = std::max(context_count / FLAGS_receiving_threads, 1UL); auto executor = server.RegisterExecutor(new Executor(FLAGS_receiving_threads)); executor->RegisterContexts(rpcCompute, rpcResources, contexts_per_executor_thread); auto status_executor = server.RegisterExecutor(new Executor(1)); status_executor->RegisterContexts(rpcStatus, statusResources, 1); auto executor_threads = std::make_shared(2); executor_threads->enqueue([rpcResources] { rpcResources->ProgressEngine(); }); executor_threads->enqueue([statusResources] { statusResources->ProgressEngine(); }); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(1), [] {}); } ================================================ FILE: examples/10_Internals/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add_executable(internals.x internals.cc ) target_link_libraries(internals.x trtlab::core trtlab::cuda gflags ) ================================================ FILE: examples/10_Internals/README.md ================================================ # YAIS Internals The `internals.x` program is designed to be run on a DGX-Station or DGX-1. This is mostly to highlight the use of the Affinity API. If you want to run on a different CPU architecture, you simply need to change the following lines to a range that works with your CPU. ``` // Socket 1 - non-hyperthreads on a DGX-1, or // Socket 0 - hyperthreads on a DGX-Station auto socket_1 = Affinity::GetAffinity().Intersection( Affinity::GetCpusFromString("20-39") // <== Change Me! ); ``` ## Primative Classes * `Affinity` * Get and Set CPU affinities for current thread * `ThreadPool` * Create generic worker thread pool that accept arbiturary lambda functions. * Pinned threads from `ThreadPools` are used to allocate memory to ensure that the CPU allocation are allocated and first-touched on the threads on the NUMA node for which it will be used. This is important for keeping threads and memory pool separate on NUMA systems. * `Memory` * `Memory` and the derived classes (`MallocMemory`, `CudaPinnedHostMemory`, `CudaDeviceMemory`) are not used directly; however they provided the implmentation details used by the generic `Allocator`. * `Allocator` * Generic Templated Class used to create `std::shared_ptr` and `std::unique_ptr` to instances of `Allocator`. * `MemoryStack` * Generic Templated Class to create a memory stack from a given `AllocatorType`. * You can only advance the stack pointer, or reset the entire stack. * TODO: Create sub-stacks from a given stack. * `Pool` * Generic Templated Class that holds objects of `ResourceType`. * Resources can be checked-out of the Pool (Pop) as a *special-type* of `std::shared_ptr`, which automatically returned the Resource object the pool when the reference count of the `shared_ptr` goes to zero. This Resources are not lost on exceptions, but also that the Pool can not be deleted until all object have been returned to the Pool. ## TensorRT Classes * `Model` * Wrapper around `nvinfer1::ICudaEngine` * `Buffers` * `MemoryStackWithTracking` and `MemoryStackWithTracking` used to manage Input/Output Tensor Bindings. * Owns a `cudaStream_t` to be used with Async Copies and Kernel Executions on the data held by the Buffers. * Convenience H2D and D2H copy functions * `ExecutionContext` - Wrapper around `nvinfer1::IExecutionContext` * `Enqueue` launches the inference calculuation and adds a `cudaEvent_t` to the stream to be triggered when the inference calcuation is finished and the `ExecutionContext` can be released. * `Resources` * Combines the above set of resources into a single `trtlab::Resources` class capable of being associated with a `nvrpc::Context`. ## Examples ### Affinity * [Definition: tensorrt/laboratory/core/affinity.h](../../yais/include/tensorrt/laboratory/core/affinity.h) * [Implementation: YAIS/Affinity.cc](../../yais/src/Affinity.cc) In this, we request the all logical CPUs from Socket 0 that are not hyperthreads, then we get either all the non-hyperthreads from socket_1 on a DGX-1, or the hyperthreads on socket0 on a DGX-Station using `GetCpusFromString`. ``` // Socket 0 - non-hyperthreads on a DGX-1 or Station auto socket_0 = Affinity::GetAffinity().Intersection( Affinity::GetCpusBySocket(0).Intersection( Affinity::GetCpusByProcessingUnit(0) )); // Socket 1 - non-hyperthreads on a DGX-1, or // Socket 0 - hyperthreads on a DGX-Station auto socket_1 = Affinity::GetAffinity().Intersection( Affinity::GetCpusFromString("20-39") ); LOG(INFO) << socket_0; ``` Single line output reformatted to per-line-indented output for readability. ``` 0515 07:14:48.007148 10919 test_affinity.cc:61] [id: 0, numa: 0, socket: 0, core: 0, processing_unit: 0], [id: 1, numa: 0, socket: 0, core: 1, processing_unit: 0], [id: 2, numa: 0, socket: 0, core: 2, processing_unit: 0], ... omitted for brevity ... [id: 18, numa: 0, socket: 0, core: 18, processing_unit: 0], [id: 19, numa: 0, socket: 0, core: 19, processing_unit: 0] ``` ### ThreadPool * [Definition: tensorrt/laboratory/core/thread_pool.h](../../yais/include/tensorrt/laboratory/core/thread_pool.h) * [Implementation: YAIS/ThreadPool.cc](../../yais/src/ThreadPool.cc) The ThreadPool class creates a pool of worker threads that pull work from a queue. The work queue can be any set of captured lambda functions or function pointers passed to the `enqueue` function. ``` // Create a ThreadPool where each thread is pinned to one logical CPU in the CpuSet auto workers_0 = std::make_shared(socket_0); auto workers_1 = std::make_shared(socket_1); // Create a massive set of threads that can run anywhere our current process is allowed to run auto bftp = std::make_unique(128, Affinity::GetAffinity()); // Shutdown the BFTP bftp.reset(); // Enqueue some basic logging for(int i=0; i<10; i++) { auto result = workers->enqueue([i]{ LOG(INFO) << i << " " << Affinity::GetAffinity(); std::this_thread::sleep_for(std::chrono::milliseconds(10)); }); } ``` As these ThreadPools are generic, we can enqueue any type of work to them. Many thanks to the original authors Jakob Progsch and Václav Zeman for this incredible useful class. For details on the original work and the modificiations made in this project, see [CREDITS.md](../../CREDITS.md) and the source code. ### Memory One of the reasons why `Affinity` and `ThreadPool` were introduced prior to `Memory` is that memory on NUMA system can be difficult to do correctly. For memory segments that will be primarly used by sets of threads, it is very important to first set the affinity of the threads, then allocate and touch each page in the memory allocation (first-touch) on the thread that will primarly use the segment. NERSC has a nice [write-up on memory affinity and first touch policies](http://www.nersc.gov/users/computational-systems/cori/application-porting-and-performance/improving-openmp-scaling/). In this section, we'll show how to properly use the `Memory` and `Allocator` classes in a NUMA friendly way using `ThreadPool`s. * [Definition: tensorrt/laboratory/core/memory.h](../../yais/include/tensorrt/laboratory/core/memory.h) The `Memory` class and its derived classes, see below, are the core memory classes in YAIS; however, these classes are not direclty used. Instead, they provide the implmentation details on how memory of their respective classes is to be allocated, freed, and page-aligned. For details, see the comments in the source code. Derived `Memory` Classes: * `Malloc` * `CudaPinnedHostMemory` * `CudaDeviceMemory` * `CudaManagedMemory` ### Allocator * [Definition: tensorrt/laboratory/core/memory.h](../../yais/include/tensorrt/laboratory/core/memory.h) The templated `Allocator` class performs memory allocations and freeing operations. This class does not have a public constructor, instead, you are required to use either the `make_shared` or `make_unique` static methods. In doing so, the method to free the allocation is captured by the deconstructor which is triggered by the default deleter of `shared_ptr` and `unique_ptr`. An allocated memory segments is of type `Allocator` which inherits from `MemoryType`. The base `Memory` class provides three functions, `GetPointer()`, `GetSize()`, and `WriteZeros()`. ``` std::shared_ptr pinned_0, pinned_1; auto future_0 = workers_0->enqueue([&pinned_0]{ pinned_0 = Allocator::make_shared(1024*1024*1024); pinned_0->WriteZeros(); }); auto future_1 = workers_1->enqueue([&pinned_1]{ pinned_1 = Allocator::make_shared(1024*1024*1024); pinned_1->WriteZeros(); }); future_0.get(); CHECK(pinned_0) << "pinned_0 got deallocated - fail"; LOG(INFO) << "pinned_0 (ptr, size): (" << pinned_0->GetPointer() << ", " << pinned_0->GetSize() << ")"; ``` ``` I0515 08:36:56.619297 13260 test_affinity.cc:59] pinned_0 (ptr, size): (0x1005e000000, 1073741824) ``` ### MemoryStack * [Definition: tensorrt/laboratory/core/memory_stack.h](../../yais/include/tensorrt/laboratory/core/memory_stack.h) Generic `MemoryStack` that takes an `AllocatorType`. The memory stack advances the stack pointer via `Allocate` and resets the stack pointer via `ResetAllocations`. `MemoryStackWithTracking` is a specialized derivation that records the pointer and size of each call to `Allocate`. `MemoryStackWithTracking` is used in the provided TensorRT classes as a means to push the input/output tensor bindings onto the stack. ``` std::shared_ptr> gpu_stack_on_socket0; future_0 = workers_0->enqueue([&gpu_stack_on_socket0]{ CHECK_EQ(cudaSetDevice(0), CUDA_SUCCESS) << "Set Device 0 failed"; gpu_stack_on_socket0 = std::make_shared< MemoryStackWithTracking>(1024*1024*1024); }); future_0.get(); // thread allocating gpu_stack_on_socket0 finished with task LOG(INFO) << "Push Binding 0 - 10MB - stack_ptr = " << gpu_stack_on_socket0->Allocate(10*1024*1024); LOG(INFO) << "Push Binding 1 - 128MB - stack_ptr = " << gpu_stack_on_socket0->Allocate(128*1024*1024); gpu_stack_on_socket0->ResetAllocations(); ``` ``` I0515 09:46:55.159700 14176 test_affinity.cc:78] Push Binding 0 - 10MB - stack_ptr = 0x1009e000000 I0515 09:46:55.159710 14176 test_affinity.cc:80] Push Binding 1 - 128MB - stack_ptr = 0x1009ea00000 ``` ### Pool * [Definition: tensorrt/laboratory/core/pool.h](../../yais/include/tensorrt/laboratory/core/pool.h) A `Pool` is a generic of `Queue>` with a special `Pop` method. The class inherits from `std::enabled_shared_from_this` meaning it must be constructed using the factory method, which ensures the object is owned by a `std::shared_ptr`. The `Pop` method of `Pool` is probably the coolest and most contensious component of this library. `Pop` pulls an resource off the queue (`from_queue`); however, it does not return this resource. Instead, a *new type* of `std::shared_ptr` is created using the raw pointer from `from_pool`. The reason they this is a *new type* of `shared_ptr` is because we provide a custom `Deleter` method that captures by value (increments reference count) of both `from_pool` and a `shared_ptr` to the pool itself. The custom `Deleter` does not free the resource when its reference count goes to zero; rather, it returns the original `from_pool` `shared_ptr` to the pool. By capturing a `shared_ptr` to the pool in the `Deleter`, we ensure the the pool can not be freed while resources are checkedout. This also ensures that the `shared_ptr` returned from `Pop` is exception safe; meaning, the resource will be returned to the pool if an exception is thrown and caught - it won't leak resources. Alternatively, `Pop` can be called with an `onReturn` lambda function, which will be executed just prior to the original object being returned to the Pool. If the `ResourceType` is stateful, this is a good chance to clear the state and prepare it for the next use. ``` struct Buffer { Buffer( std::shared_ptr pinned_, std::shared_ptr> gpu_stack_, std::shared_ptr workers_ ) : pinned(pinned_), gpu_stack(gpu_stack_), workers(workers_) {} // a real example probably includes a deviceID and a stream as part of the buffer std::shared_ptr pinned; std::shared_ptr> gpu_stack; std::shared_ptr workers; }; auto buffers = Pool::Create(); buffers->EmplacePush(new Buffer(pinned_0, gpu_stack_on_socket0, workers_0)); buffers->EmplacePush(new Buffer(pinned_1, gpu_stack_on_socket1, workers_1)); for(int i=0; i<6; i++) { auto buffer = buffers->Pop(); buffer->workers->enqueue([buffer]{ // perform some work - regardless of which buffer you got, you are working // on a thread properly assocated with the resources // note: buffer is captures by value, incrementing its reference count, // meaning you have access to it here and when it goes out of scope, it will // be returned to the Pool. LOG(INFO) << Affinity::GetAffinity(); }); } ``` ## TensorRT Examples * [Definition: YAIS/YAIS/TensorRT/TensorRT.h](../../yais/include/YAIS/TensorRT/TensorRT.h) * [Implemenation: YAIS/TensorRT.cc](../../yais/src/TensorRT.cc) TensoRT classes build on the primatives above. For now, see the comments in the header file, as the header file is pretty well documented. ================================================ FILE: examples/10_Internals/internals.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "tensorrt/laboratory/core/affinity.h" #include "tensorrt/laboratory/core/memory/allocator.h" #include "tensorrt/laboratory/core/memory/memory_stack.h" #include "tensorrt/laboratory/core/pool.h" #include "tensorrt/laboratory/core/thread_pool.h" #include "tensorrt/laboratory/cuda/device_info.h" #include "tensorrt/laboratory/cuda/memory/cuda_device.h" #include "tensorrt/laboratory/cuda/memory/cuda_managed.h" #include "tensorrt/laboratory/cuda/memory/cuda_pinned_host.h" #include #include #include #include #include #include #include #include using trtlab::Affinity; using trtlab::Allocator; using trtlab::CpuSet; using trtlab::CudaDeviceMemory; using trtlab::CudaPinnedHostMemory; using trtlab::DeviceInfo; using trtlab::MemoryStack; using trtlab::Pool; using trtlab::ThreadPool; int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("example10/internals.x"); auto one_gib = 1024 * 1024 * 1024; auto zeroMemory = true; const auto& gpu_0 = DeviceInfo::Affinity(0); // Socket 0 - non-hyperthreads on a DGX-1 or Station const auto& socket_0 = Affinity::GetAffinity().Intersection( Affinity::GetCpusBySocket(0).Intersection(Affinity::GetCpusByProcessingUnit(0))); // Socket 1 - non-hyperthreads on a DGX-1, or // Socket 0 - hyperthreads on a DGX-Station const auto& socket_1 = Affinity::GetAffinity().Intersection(Affinity::GetCpusFromString("20-39")); auto workers_0 = std::make_shared(socket_0); auto workers_1 = std::make_shared(socket_1); std::shared_ptr pinned_0, pinned_1; auto future_0 = workers_0->enqueue([=, &pinned_0] { pinned_0 = std::make_shared>(one_gib); pinned_0->Fill(0); }); auto future_1 = workers_1->enqueue([=, &pinned_1] { pinned_1 = std::make_shared>(one_gib); pinned_1->Fill(0); }); LOG(INFO) << socket_0; future_0.get(); CHECK(pinned_0) << "pinned_0 got deAllocator - fail"; LOG(INFO) << "pinned_0 (ptr, size): (" << pinned_0->Data() << ", " << pinned_0->Size() << ")"; future_1.get(); std::shared_ptr> gpu_stack_on_socket0; std::shared_ptr> gpu_stack_on_socket1; // It's not strictly necessary to alloaction GPU memory from threads near the GPU // this just drives home the point that we want to align CPU worker thread to GPU affinity. future_0 = workers_0->enqueue([=, &gpu_stack_on_socket0] { CHECK_EQ(cudaSetDevice(0), CUDA_SUCCESS) << "Set Device 0 failed"; gpu_stack_on_socket0 = std::make_shared>(one_gib); gpu_stack_on_socket0->Reset(zeroMemory); }); // On a dual-socket system, we could use workers_1 to allocation device memory. // Leaving this as an exercise to the reader. future_0.get(); // thread allocating gpu_stack_on_socket0 finished with task LOG(INFO) << "Push Binding 0 - 10MB - stack_ptr = " << gpu_stack_on_socket0->Allocate(10 * 1024 * 1024); LOG(INFO) << "Push Binding 1 - 128MB - stack_ptr = " << gpu_stack_on_socket0->Allocate(128 * 1024 * 1024); // Try allocating 1 byte. Notice how the memory is aligned. Default alignment // is defined by the MemoryType in Memory.h gpu_stack_on_socket0->Reset(); /** * Create a Buffer object associates a worker threads, host memory and device memory * that are properly aligned to the hardware topology. */ struct Buffer { Buffer(std::shared_ptr pinned_, std::shared_ptr> gpu_stack_, std::shared_ptr workers_) : pinned(pinned_), gpu_stack(gpu_stack_), workers(workers_) { } std::shared_ptr pinned; std::shared_ptr> gpu_stack; std::shared_ptr workers; // Normally, we'd associate some GPU index value to the buffer. }; // Now create a Pool of Buffers auto buffers = Pool::Create(); // Here we push two buffers, one for each socket. buffers->EmplacePush(new Buffer(pinned_0, gpu_stack_on_socket0, workers_0)); buffers->EmplacePush(new Buffer(pinned_1, gpu_stack_on_socket1, workers_1)); // Exercise: add more buffer objects. Which of the three objects per Buffer // will you reuse, which will you make new instances of? // If you have arbituray work which is not necesasry topology aligned, say an incoming // inference request, you can pull a buffer object from the pool and queue work to the // proper set of threads best associated with that device for(int i = 0; i < 6; i++) { auto buffer = buffers->Pop(); buffer->workers->enqueue([buffer] { // perform some work - regardless of which buffer you got, you are working // on a thread properly assocated with the resources LOG(INFO) << Affinity::GetAffinity(); std::this_thread::sleep_for(std::chrono::milliseconds(1)); }); } workers_0.reset(); workers_1.reset(); return 0; } ================================================ FILE: examples/11_Protos/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set(protobuf_MODULE_COMPATIBLE TRUE) find_package(Protobuf CONFIG REQUIRED) message(STATUS "Using protobuf ${protobuf_VERSION}") set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf) set(_PROTOBUF_PROTOC $) add_subdirectory(echo) add_subdirectory(demo) add_subdirectory(inference) ================================================ FILE: examples/11_Protos/demo/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INCLUDE(GRPCGenerateCPP) PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS dataset.proto inference.proto ) PROTOBUF_GENERATE_GRPC_CPP(PROTO_GRPC_SRCS PROTO_GRPC_HDRS dataset.proto inference.proto ) add_library(demo-protos ${PROTO_SRCS} ${PROTO_GRPC_SRCS} ) target_link_libraries(demo-protos PUBLIC ${_PROTOBUF_LIBPROTOBUF} ) target_include_directories(demo-protos PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ) ================================================ FILE: examples/11_Protos/demo/dataset.proto ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ syntax = "proto3"; service SharedMemoryDataSet { rpc GetInfo (InfoRequest) returns (Info) {} } message Image { fixed64 sysv_offset = 1; uint32 label_index = 2; repeated int32 shape = 3; uint64 size = 4; string filename = 5; } message Info { uint32 handle = 1; uint64 sysv_key = 2; repeated Image images = 3; repeated string labels = 4; } message InfoRequest { uint32 image_size = 1; } ================================================ FILE: examples/11_Protos/demo/inference.proto ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ syntax = "proto3"; package ssd; service Inference { rpc Compute (BatchInput) returns (BatchPredictions) {} } message BatchInput { uint32 engine_id = 1; uint64 batch_id = 2; uint32 batch_size = 3; uint32 int_offset = 4; uint64 sysv_offset = 5; bytes data = 6; } message BatchPredictions { repeated Element elements = 1; uint64 batch_id = 2; float compute_time = 3; float total_time = 4; repeated Timer timers = 5; } message Element { repeated Prediction predictions = 2; } message Bbox { float x = 1; // upper left float y = 2; // upper left float width = 3; float height = 4; uint32 class_id = 5; } message Prediction { uint32 class_id = 1; string class_str = 2; float score = 3; Bbox bbox = 4; } message Timer { string name = 1; float time = 2; enum TimerUnit { SECONDS = 0; MILLI = 1; MICRO = 2; NANO = 3; } TimerUnit unit = 3; } ================================================ FILE: examples/11_Protos/echo/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INCLUDE(GRPCGenerateCPP) PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS echo.proto ) PROTOBUF_GENERATE_GRPC_CPP(PROTO_GRPC_SRCS PROTO_GRPC_HDRS echo.proto ) add_library(echo-protos ${PROTO_SRCS} ${PROTO_GRPC_SRCS} ) target_link_libraries(echo-protos PUBLIC ${_PROTOBUF_LIBPROTOBUF} ) target_include_directories(echo-protos PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ) ================================================ FILE: examples/11_Protos/echo/echo.proto ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ syntax = "proto3"; package simple; service Inference { rpc Compute (Input) returns (Output) {} rpc Bidirectional (stream Input) returns (stream Output) {} rpc BatchedCompute (stream Input) returns (stream Output) {} } message SystemV { uint64 shm_id = 1; uint64 offset = 2; uint64 size = 3; } message Input { uint64 batch_id = 1; oneof data { bytes raw_bytes = 2; SystemV sysv = 3; } } message Output { uint64 batch_id = 1; } ================================================ FILE: examples/11_Protos/inference/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INCLUDE(GRPCGenerateCPP) PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS nvidia_inference.proto api.proto model_config.proto request_status.proto server_status.proto ) PROTOBUF_GENERATE_GRPC_CPP(PROTO_GRPC_SRCS PROTO_GRPC_HDRS nvidia_inference.proto ) add_library(nv-inference-protos ${PROTO_SRCS} ${PROTO_GRPC_SRCS} ) target_link_libraries(nv-inference-protos PUBLIC ${_PROTOBUF_LIBPROTOBUF} ) target_include_directories(nv-inference-protos PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ) ================================================ FILE: examples/11_Protos/inference/api.proto ================================================ // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. syntax = "proto3"; package nvidia.inferenceserver; // Request header for inferencing. The actual input data is delivered // separate from the header. message InferRequestHeader { // Input... message Input { // Name of the input. string name = 1; // Size of the input, in bytes. This is the size for one instance // of the input, not the entire size of a batch of the input. uint64 byte_size = 2; } // Output... message Output { // Name of the output. string name = 1; // Size of the output, in bytes. This is the size for one instance // of the output, not the entire size of a batch of the output. uint64 byte_size = 2; // Class result format. The output must be a vector. Output values // will be interpreted as probabilities and the highest 'count' // values will be returned. message Class { // Return the 'count' highest valued results. uint32 count = 1; } // Optional. If defined return this result as a classification // instead of raw data. Class cls = 3; } // Batch size of the inference inputs. uint32 batch_size = 1; // Inference inputs. repeated Input input = 2; // Inference outputs that are being requested. repeated Output output = 3; } // Response header for inferencing. Any raw response data (i.e. tensor // values) is delivered separately from the header. message InferResponseHeader { // Output... message Output { // Name of the output. string name = 1; // Raw result message Raw { // Size of the output, in bytes. This is the size for one // instance of the output, not the entire size of a batch of the // output. uint64 byte_size = 1; } // Classification result message Class { // The index in the output tensor. int32 idx = 1; // The value of the class as a float (typically a probability). float value = 2; // The label for the class (optional, only available if provided // by the model). string label = 3; } message Classes { // The topk classes for this output repeated Class cls = 1; } // Result format for this output. Only one of these may be // specified. For 'batch_classes' there should be one entry for // each output of the batch. Raw raw = 2; repeated Classes batch_classes = 3; } // Name of the model that produced the results. string model_name = 1; // Version of the model that produced the results. uint32 model_version = 2; // Batch size of the inference outputs. uint32 batch_size = 3; // The outputs repeated Output output = 4; } ================================================ FILE: examples/11_Protos/inference/model_config.proto ================================================ // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2018-2019, TensorFlow Authors. All rights reserved. syntax = "proto3"; package nvidia.inferenceserver; // Data types supported for input and output tensors. enum DataType { TYPE_INVALID = 0; TYPE_BOOL = 1; TYPE_UINT8 = 2; TYPE_UINT16 = 3; TYPE_UINT32 = 4; TYPE_UINT64 = 5; TYPE_INT8 = 6; TYPE_INT16 = 7; TYPE_INT32 = 8; TYPE_INT64 = 9; TYPE_FP16 = 10; TYPE_FP32 = 11; TYPE_FP64 = 12; } // A group of one or more instances of a model and resources made // available for those instances. message ModelInstanceGroup { // Kind of this instance group. enum Kind { // This instance group represents instances that can run on either // CPU or GPU. If all GPUs listed in 'gpus' are available then // instances will be create on GPU(s), otherwise instances will be // created on CPU. KIND_AUTO = 0; // This instance group represents instances that must run on the // GPU. KIND_GPU = 1; // This instance group represents instances that must run on the // CPU. KIND_CPU = 2; } // Optional name of this group of instances. If not specified the // name will be formed as _. The name of // individual instances will be further formed by a unique instance // number and GPU index: // __gpu string name = 1; // The kind of this instance group. Default is KIND_AUTO. If // KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and // may be specified. If KIND_CPU only 'count' is valid and 'gpu' // cannot be specified. Kind kind = 4; // Number of instances in this group created for each GPU listed in // 'gpus'. Default is 1. int32 count = 2; // GPU(s) where instances should be available. For each GPU listed, // 'count' instances of the model will be available. Setting 'gpus' // to empty (or not specifying at all) is eqivalent to listing all // system GPUs. repeated int32 gpus = 3; } // Input tensor for the model message ModelInput { // Format for the input. enum Format { // The input has no specific format. FORMAT_NONE = 0; // Image formats. Tensors with this format require 3 dimensions if // the model does not support batching (max_batch_size = 0) or 4 // dimensions if the model does support batching (max_batch_size // >= 1). In either case the 'dims' below should only specify the // 3 non-batch dimensions (i.e. HWC or CHW). FORMAT_NHWC = 1; FORMAT_NCHW = 2; } string name = 1; DataType data_type = 2; Format format = 3; repeated int64 dims = 4; } // Output tensor for the model message ModelOutput { string name = 1; DataType data_type = 2; repeated int64 dims = 3; // Label file for this output (optional). string label_filename = 4; } // Policy indicating which versions of a model should be made // available by the inference server. message ModelVersionPolicy { // Serve only the 'num_versions' highest-numbered versions. This is // the default policy and the default value of 'num_versions' is 1, // indicating that by default only the highest-number version of a // model will be served. message Latest { uint32 num_versions = 1; } // Serve all versions of the model. message All { } // Serve only a specific set of versions of the model. message Specific { repeated int64 versions = 1; } // Each model must implement only a single policy. The default // policy is 'Latest'. oneof policy_choice { Latest latest = 1; All all = 2; Specific specific = 3; } } // Model configuration. message ModelConfig { // Name of the model. string name = 1; // Type of model (e.g. "tensorflow"). string platform = 2; // Policy indicating which version(s) of the model will be served. ModelVersionPolicy version_policy = 3; // Maximum batch size allowed for inference. This can only decrease // what is allowed by the model itself. A value of 0 indicates that // batching is not-allowed/is-disabled (for some input formats this // has implications on the expected dimension of the inputs, see // Format above). int32 max_batch_size = 4; // Inputs and outputs to the model. repeated ModelInput input = 5; repeated ModelOutput output = 6; // Optional instances of this model. If not specified, one instance // of the model will be instantiated on each available GPU. repeated ModelInstanceGroup instance_group = 7; // Optional filename of the model file to use if a // compute-capability specific model is not specified in // 'cc_model_names'. If not specified the default is model.graphdef // for TF graphdef models and model.plan for TensorRT PLAN models. string default_model_filename = 8; // Optional map from CUDA compute capabilities to the filename of // the model that supports that compute capability. The filename // refers to a file within the model version directory. map cc_model_filenames = 9; } // List of model configurations. message ModelConfigList { repeated ModelConfig config = 1; } ================================================ FILE: examples/11_Protos/inference/nvidia_inference.proto ================================================ // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. syntax = "proto3"; package nvidia.inferenceserver; import "api.proto"; import "request_status.proto"; import "server_status.proto"; service GRPCService { // Get server or model status rpc Status(StatusRequest) returns (StatusResponse) {} // Control server profiling rpc Profile(ProfileRequest) returns (ProfileResponse) {} // Health check rpc Health(HealthRequest) returns (HealthResponse) {} // Perform inference. [ Set the maximum message size (default 4 MB) // and transmit in one pass Seems like Tensorflow uses this approach // to transfer tensor which can be large // https://github.com/grpc/grpc/issues/8975 ] rpc Infer(InferRequest) returns (InferResponse) {} } // Request message for server status. message StatusRequest { // The specific model status to be returned. Return status for all // models if empty. string model_name = 1; } // Response message for server status. message StatusResponse { RequestStatus request_status = 1; ServerStatus server_status = 2; } // Request message for profile. message ProfileRequest { string cmd = 1; } // Response message for profile. message ProfileResponse{ RequestStatus request_status = 1; } // Request message for health. message HealthRequest { string mode = 1; } // Response message for health. message HealthResponse{ RequestStatus request_status = 1; bool health = 2; } // Request message for inference. message InferRequest { // Name of model to use for inference string model_name = 1; // Version of the model to use for inference. If not specified use // the latest/most-recent version of the model. [ Use string here so // default value of empty indicates not specified]. string version = 2; // Meta-data for the inference request. InferRequestHeader meta_data = 3; // Raw input tensor data in the order specified in 'meta_data'. repeated bytes raw_input = 4; uint64 batch_id = 100; uint32 batch_size = 101; uint64 sysv_offset = 102; } // Response message for inference. message InferResponse { RequestStatus request_status = 1; // Meta-data for the inference response. InferResponseHeader meta_data = 2; // Raw output tensor data in the order specified in 'meta_data'. repeated bytes raw_output = 3; uint64 batch_id = 100; float compute_time = 101; float request_time = 102; } ================================================ FILE: examples/11_Protos/inference/request_status.proto ================================================ // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. syntax = "proto3"; package nvidia.inferenceserver; // Status codes returned for inference server requests enum RequestStatusCode { INVALID = 0; SUCCESS = 1; UNKNOWN = 2; INTERNAL = 3; NOT_FOUND = 4; INVALID_ARG = 5; UNAVAILABLE = 6; UNSUPPORTED = 7; } // Status returned for all inference server requests message RequestStatus { // Required status code RequestStatusCode code = 1; // Optional message string msg = 2; // Inference server identifier. string server_id = 3; // Unique identifier for the request. Value 0 (zero) indicates // request ID is not known. uint64 request_id = 4; } ================================================ FILE: examples/11_Protos/inference/server_status.proto ================================================ // Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. syntax = "proto3"; package nvidia.inferenceserver; import "model_config.proto"; // Statistic collecting a duration metric message StatDuration { // Cummulative number of times this metric occurred. uint64 count = 1; // Total collected duration of this metric in nanoseconds. uint64 total_time_ns = 2; } // Stats collected for Status requests. message StatusRequestStats { // Total time required to service api/status requests, not including // HTTP or gRPC endpoint termination time. StatDuration success = 1; } // Stats collected for Profile requests. message ProfileRequestStats { // Total time required to service profile requests, not including // HTTP or gRPC endpoint termination time. StatDuration success = 1; } // Stats collected for Health requests. message HealthRequestStats { // Total time required to service health requests, not including // HTTP or gRPC endpoint termination time. StatDuration success = 1; } // Stats collected for Infer requests. message InferRequestStats { // Total time required to service successful inference requests, // not including HTTP or gRPC endpoint termination time. StatDuration success = 1; // Total time required to service failed inference requests, not // including HTTP or gRPC endpoint termination time. StatDuration failed = 2; // Time required to run inferencing including time waiting for an // available model instance, time copying input tensors to GPU // memory, time executing the model, and time copying output tensors // from GPU memory. Wait time is also captured separately 'run_wait' // so to get inferencing time not including wait time use 'run' - // 'run_wait'. StatDuration run = 3; // Time waiting for an available model instance. StatDuration run_wait = 4; } // Server readiness states. enum ModelReadyState { MODEL_UNKNOWN = 0; MODEL_READY = 1; MODEL_UNAVAILABLE = 2; MODEL_LOADING = 3; MODEL_UNLOADING = 4; } // Status for a version of a model. message ModelVersionStatus { // Current readiness state for the model version. ModelReadyState ready_state = 1; // Duration statistics for each batch size used for this version of // the model. map infer_stats = 2; } // Status for a model. message ModelStatus { // The configuration for the model. ModelConfig config = 1; // Duration statistics for each version of this model. map version_status = 2; } // Server readiness states. enum ServerReadyState { SERVER_INVALID = 0; SERVER_INITIALIZING = 1; SERVER_READY = 2; SERVER_EXITING = 3; SERVER_FAILED_TO_INITIALIZE = 10; } // Status for inference server message ServerStatus { // Server ID. string id = 1; // Server version. string version = 2; // Current readiness state for the server. ServerReadyState ready_state = 7; // Server uptime in nanoseconds uint64 uptime_ns = 3; // Status for each model on the server as map from -> // ModelStatus. map model_status = 4; // Statistics for Status requests. StatusRequestStats status_stats = 5; // Statistics for Profile requests. ProfileRequestStats profile_stats = 6; // Statistics for Health requests. HealthRequestStats health_stats = 8; } ================================================ FILE: examples/12_ConfigGenerator/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pybind11_add_module(config_generator generator.cc) target_link_libraries(config_generator PUBLIC yais nv-inference-protos ) ================================================ FILE: examples/12_ConfigGenerator/README.md ================================================ # TensorRT Inference Server Model Store Builder - Ensure you built the project. - Run `./link.sh` in this directory ## Design Requirements This example consists of a ModelStore manager (Python) and a ModelConfigGenerator (C++ w/ Python bindings). The ModelConfigGenerator shall: - [X] parse serialized TensorRT engine files - [X] translate the necessary properties of the ICudaEngine to an `::nvidia::inferenceserver::ModelConfig` protobuf message - [ ] not require the presence of Cuda or a GPU to perform the actions The ModelStore manager consists of a Python class for direct consumption and a command-line application that shall: - [ ] create and manage a model-store in a user-supplied filesystem directory - [X] add TensorRT model files to the model store using the ModelConfigGenerator and user-specified arguments - [ ] add new version of TensorRT models to a ModelStore - [ ] remove versions of entire models from the ModelStore - [ ] add, edit, update and remove Tensorflow models - [ ] add, edit, update and remove PyTorch/Caffe2 models ## Prototype Implementation ``` ./ms_mgmt --help Usage: ms_mgmt [OPTIONS] Options: --engine PATH TensorRT serialized engine [required] --concurrency INTEGER max number of concurrency executions allowed --name TEXT model name; default to basename(engine) with the ext dropped --version INTEGER model version --store-path TEXT model store path; default to ./model-store --help Show this message and exit. ``` ``` ./ms_mgmt --store-path=/tmp/model-store --engine=/work/models/ResNet-50-b1-fp32.engine --name=overridden-model-name --version=1337 --concurrency=10 ls /tmp/model-store/ overridden-model-name ls /tmp/model-store/overridden-model-name/1337/ ResNet-50-b1-fp32.engine model.plan cat /tmp/model-store/overridden-model-name/config.pbtxt name: "overridden-model-name" platform: "tensorrt_plan" max_batch_size: 1 input { name: "data" data_type: TYPE_FP32 dims: 3 dims: 224 dims: 224 } output { name: "prob" data_type: TYPE_FP32 dims: 1000 dims: 1 dims: 1 } instance_group { count: 10 gpus: 0 } ``` ================================================ FILE: examples/12_ConfigGenerator/generator.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "YAIS/TensorRT/TensorRT.h" #include "YAIS/YAIS.h" // NVIDIA Inference Server Protos #include "nvidia_inference.grpc.pb.h" #include "nvidia_inference.pb.h" using nvidia::inferenceserver::ModelConfig; using trtlab::TensorRT::Runtime; static size_t DataTypeToBytes(nvidia::inferenceserver::DataType dataType) { switch(dataType) { case nvidia::inferenceserver::TYPE_INVALID: CHECK(false) << "Invalid DataType used"; return 0; case nvidia::inferenceserver::TYPE_BOOL: case nvidia::inferenceserver::TYPE_UINT8: case nvidia::inferenceserver::TYPE_INT8: return 1; case nvidia::inferenceserver::TYPE_UINT16: case nvidia::inferenceserver::TYPE_INT16: case nvidia::inferenceserver::TYPE_FP16: return 2; case nvidia::inferenceserver::TYPE_UINT32: case nvidia::inferenceserver::TYPE_INT32: case nvidia::inferenceserver::TYPE_FP32: return 4; case nvidia::inferenceserver::TYPE_UINT64: case nvidia::inferenceserver::TYPE_INT64: case nvidia::inferenceserver::TYPE_FP64: return 8; default: CHECK(false) << "Invalid DataType used"; return 0; } } static nvidia::inferenceserver::DataType ConvertTensorRTDataType(nvinfer1::DataType trt_datatype) { switch(trt_datatype) { case nvinfer1::DataType::kFLOAT: return nvidia::inferenceserver::TYPE_FP32; case nvinfer1::DataType::kHALF: return nvidia::inferenceserver::TYPE_FP16; case nvinfer1::DataType::kINT8: return nvidia::inferenceserver::TYPE_INT8; case nvinfer1::DataType::kINT32: return nvidia::inferenceserver::TYPE_INT32; default: LOG(FATAL) << "Unknown TensorRT DataType"; } } std::string tensorrt_engine(std::string model_name, std::string engine, int concurrency) { ModelConfig config; auto model = trtlab::TensorRT::Runtime::DeserializeEngine(engine); config.set_name(model_name); config.set_platform("tensorrt_plan"); config.set_max_batch_size(model->GetMaxBatchSize()); for(auto i : model->GetInputBindingIds()) { const auto& binding = model->GetBinding(i); auto input = config.add_input(); input->set_name(binding.name); input->set_data_type(ConvertTensorRTDataType(binding.dtype)); for(auto d : binding.dims) { input->add_dims(d); } } for(auto i : model->GetOutputBindingIds()) { const auto& binding = model->GetBinding(i); auto output = config.add_output(); output->set_name(binding.name); output->set_data_type(ConvertTensorRTDataType(binding.dtype)); for(auto d : binding.dims) { output->add_dims(d); } } auto instance_group = config.add_instance_group(); CHECK(concurrency > 0) << "Concurrency must be >= 0"; instance_group->set_count(concurrency); instance_group->add_gpus(0); return config.DebugString(); } namespace py = pybind11; PYBIND11_MODULE(config_generator, m) { m.doc() = R"pbdoc( Pybind11 Yais plugin -------------------- .. currentmodule:: config_generator .. autosummary:: :toctree: _generate tensorrt_engine )pbdoc"; m.def("tensorrt_engine", &tensorrt_engine, R"pbdoc( Generate a TensorRT Inference Server ModelConfig from a serialized engine file )pbdoc"); #ifdef VERSION_INFO m.attr("__version__") = VERSION_INFO; #else m.attr("__version__") = "dev"; #endif } ================================================ FILE: examples/12_ConfigGenerator/link.sh ================================================ #!/bin/bash ln -s /work/build/examples/12_ConfigGenerator/config_generator.cpython-35m-x86_64-linux-gnu.so ================================================ FILE: examples/12_ConfigGenerator/ms_mgmt ================================================ #!/usr/bin/env python3 import os import pathlib import shutil from contextlib import contextmanager import click import config_generator as cg FileType = click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True) PathType = click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True) @contextmanager def cd(newdir): prevdir = os.getcwd() os.chdir(os.path.expanduser(newdir)) try: yield finally: os.chdir(prevdir) # Path(exists=False, file_okay=True, dir_okay=True, writable=False, readable=True, resolve_path=False) class ModelStore: def __init__(self, *, path, create=False, validate=False): if not os.path.isdir(path): pathlib.Path(path).mkdir(parents=create, exist_ok=create) self.path = os.path.abspath(path) self.name = os.path.basename(self.path) def model_name(self, *, engine, name=None): base = os.path.basename(engine) model = name or base.replace(".engine", "") return model def model_path(self, *, name): return os.path.join(self.path, name) def engine_path(self, *, name, version=0): return os.path.join(self.model_path(name=name), str(version)) def create_engine_path(self, *, name, version): engine_path = self.engine_path(name=name, version=version) if os.path.exists(engine_path): raise RuntimeError("{} already exists in the model store".format(model)) pathlib.Path(engine_path).mkdir(parents=True, exist_ok=True) return engine_path def copy_and_link_engine(self, *, name, version, engine): engine_path = self.create_engine_path(name=name, version=version) shutil.copy(engine, engine_path) with cd(engine_path): os.symlink(os.path.basename(engine), "model.plan") def add_tensorrt_engine(self, *, engine, name=None, concurrency=1, version=0): engine = os.path.abspath(engine) if not os.path.isfile(engine): raise RuntimeError("{} engine does not exist".format(engine)) name = self.model_name(engine=engine, name=name) model_path = self.model_path(name=name) self.copy_and_link_engine(name=name, version=version, engine=engine) config = cg.tensorrt_engine(name, engine, concurrency) with cd(model_path), open("config.pbtxt", "w") as file: file.write(config) @click.command() @click.option("--engine", type=FileType, required=True, help="TensorRT serialized engine") @click.option("--concurrency", type=int, default=1, help="max number of concurrency executions allowed") @click.option("--name", default=None, help="model name; default to basename(engine) with the ext dropped") @click.option("--version", type=int, default=0, help="model version") @click.option("--store-path", default=None, help="model store path; default to ./model-store") def main(engine, concurrency, name, store_path, version): store_path = store_path or "model-store" store = ModelStore(path=store_path, create=True) base = os.path.basename(engine) name = name or base.replace(".engine", "") store.add_tensorrt_engine(engine=engine, name=name, concurrency=concurrency, version=version) if __name__ == "__main__": main() ================================================ FILE: examples/12_FlatBuffers/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. find_package(Flatbuffers) # This module defines # FLATBUFFERS_INCLUDE_DIR, directory containing headers # FLATBUFFERS_LIBS, directory containing flatbuffers libraries # FLATBUFFERS_STATIC_LIB, path to libflatbuffers.a # FLATBUFFERS_FOUND, whether flatbuffers has been found add_library(example-fbs example.grpc.fb.cc ) target_link_libraries(example-fbs PUBLIC flatbuffers ) target_include_directories(example-fbs PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ) add_executable(fb-server.x server.cc) target_link_libraries(fb-server.x nvrpc example-fbs gflags ) add_executable(fb-client.x client.cc) target_link_libraries(fb-client.x nvrpc example-fbs gflags ) ================================================ FILE: examples/12_FlatBuffers/client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "example.grpc.fb.h" #include "example_generated.h" using Input = flatbuffers::grpc::Message; using Output = flatbuffers::grpc::Message; using grpc::Channel; using grpc::ClientContext; using grpc::Status; class SimpleClient { public: SimpleClient(std::shared_ptr channel) : stub_(Greeter::NewStub(channel)) {} // Assembles the client's payload, sends it and presents the response back // from the server. std::string Compute(const int batch_id) { flatbuffers::grpc::MessageBuilder mb; // Data we are sending to the server. auto name_offset = mb.CreateString(std::to_string(batch_id)); auto request_offset = CreateHelloRequest(mb, name_offset); mb.Finish(request_offset); auto request = mb.ReleaseMessage(); // Container for the data we expect from the server. Output reply; // Context for the client. It could be used to convey extra information to // the server and/or tweak certain RPC behaviors. ClientContext context; // The actual RPC. Status status = stub_->SayHello(&context, request, &reply); // Act upon its status. if(status.ok()) { const HelloReply* output = reply.GetRoot(); return output->message()->str(); } else { std::cout << status.error_code() << ": " << status.error_message() << std::endl; return "Fail!"; } } private: std::unique_ptr stub_; }; DEFINE_int32(count, 100, "number of grpc messages to send"); int main(int argc, char** argv) { // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); SimpleClient client(grpc::CreateChannel("localhost:50051", grpc::InsecureChannelCredentials())); auto start = std::chrono::steady_clock::now(); for(int i = 0; i < FLAGS_count; i++) { auto reply = client.Compute(i); LOG_FIRST_N(INFO, 20) << reply; } auto end = std::chrono::steady_clock::now(); float elapsed = std::chrono::duration(end - start).count(); std::cout << FLAGS_count << " requests in " << elapsed << "seconds" << std::endl; return 0; } ================================================ FILE: examples/12_FlatBuffers/example.fbs ================================================ table HelloReply { message:string; } table HelloRequest { name:string; } table ManyHellosRequest { name:string; num_greetings:int; } rpc_service Greeter { SayHello(HelloRequest):HelloReply; SayManyHellos(ManyHellosRequest):HelloReply (streaming: "server"); } ================================================ FILE: examples/12_FlatBuffers/example.grpc.fb.cc ================================================ // Generated by the gRPC C++ plugin. // If you make any local change, they will be lost. // source: example #include "example.grpc.fb.h" #include "example_generated.h" #include #include #include #include #include #include #include #include static const char* Greeter_method_names[] = { "/Greeter/SayHello", "/Greeter/SayManyHellos", }; std::unique_ptr Greeter::NewStub(const std::shared_ptr<::grpc::ChannelInterface>& channel, const ::grpc::StubOptions& options) { std::unique_ptr stub(new Greeter::Stub(channel)); return stub; } Greeter::Stub::Stub(const std::shared_ptr<::grpc::ChannelInterface>& channel) : channel_(channel), rpcmethod_SayHello_(Greeter_method_names[0], ::grpc::internal::RpcMethod::NORMAL_RPC, channel), rpcmethod_SayManyHellos_(Greeter_method_names[1], ::grpc::internal::RpcMethod::SERVER_STREAMING, channel) { } ::grpc::Status Greeter::Stub::SayHello(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, flatbuffers::grpc::Message* response) { return ::grpc::internal::BlockingUnaryCall(channel_.get(), rpcmethod_SayHello_, context, request, response); } ::grpc::ClientAsyncResponseReader>* Greeter::Stub::AsyncSayHelloRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return ::grpc::internal::ClientAsyncResponseReaderFactory< flatbuffers::grpc::Message>::Create(channel_.get(), cq, rpcmethod_SayHello_, context, request, true); } ::grpc::ClientAsyncResponseReader>* Greeter::Stub::PrepareAsyncSayHelloRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return ::grpc::internal::ClientAsyncResponseReaderFactory< flatbuffers::grpc::Message>::Create(channel_.get(), cq, rpcmethod_SayHello_, context, request, false); } ::grpc::ClientReader>* Greeter::Stub::SayManyHellosRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request) { return ::grpc::internal::ClientReaderFactory>::Create( channel_.get(), rpcmethod_SayManyHellos_, context, request); } ::grpc::ClientAsyncReader>* Greeter::Stub::AsyncSayManyHellosRaw( ::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq, void* tag) { return ::grpc::internal::ClientAsyncReaderFactory< flatbuffers::grpc::Message>::Create(channel_.get(), cq, rpcmethod_SayManyHellos_, context, request, true, tag); } ::grpc::ClientAsyncReader>* Greeter::Stub::PrepareAsyncSayManyHellosRaw( ::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return ::grpc::internal::ClientAsyncReaderFactory< flatbuffers::grpc::Message>::Create(channel_.get(), cq, rpcmethod_SayManyHellos_, context, request, false, nullptr); } Greeter::Service::Service() { AddMethod(new ::grpc::internal::RpcServiceMethod( Greeter_method_names[0], ::grpc::internal::RpcMethod::NORMAL_RPC, new ::grpc::internal::RpcMethodHandler, flatbuffers::grpc::Message>( std::mem_fn(&Greeter::Service::SayHello), this))); AddMethod(new ::grpc::internal::RpcServiceMethod( Greeter_method_names[1], ::grpc::internal::RpcMethod::SERVER_STREAMING, new ::grpc::internal::ServerStreamingHandler, flatbuffers::grpc::Message>( std::mem_fn(&Greeter::Service::SayManyHellos), this))); } Greeter::Service::~Service() {} ::grpc::Status Greeter::Service::SayHello(::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, flatbuffers::grpc::Message* response) { (void)context; (void)request; (void)response; return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } ::grpc::Status Greeter::Service::SayManyHellos( ::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, ::grpc::ServerWriter>* writer) { (void)context; (void)request; (void)writer; return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } ================================================ FILE: examples/12_FlatBuffers/example.grpc.fb.h ================================================ // Generated by the gRPC C++ plugin. // If you make any local change, they will be lost. // source: example #ifndef GRPC_example__INCLUDED #define GRPC_example__INCLUDED #include "example_generated.h" #include "flatbuffers/grpc.h" #include #include #include #include #include #include #include #include #include namespace grpc { class CompletionQueue; class Channel; class ServerCompletionQueue; class ServerContext; } // namespace grpc class Greeter final { public: static constexpr char const* service_full_name() { return "Greeter"; } class StubInterface { public: virtual ~StubInterface() {} virtual ::grpc::Status SayHello(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, flatbuffers::grpc::Message* response) = 0; std::unique_ptr< ::grpc::ClientAsyncResponseReaderInterface>> AsyncSayHello(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return std::unique_ptr< ::grpc::ClientAsyncResponseReaderInterface>>( AsyncSayHelloRaw(context, request, cq)); } std::unique_ptr< ::grpc::ClientAsyncResponseReaderInterface>> PrepareAsyncSayHello(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return std::unique_ptr< ::grpc::ClientAsyncResponseReaderInterface>>( PrepareAsyncSayHelloRaw(context, request, cq)); } std::unique_ptr<::grpc::ClientReaderInterface>> SayManyHellos(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request) { return std::unique_ptr< ::grpc::ClientReaderInterface>>( SayManyHellosRaw(context, request)); } std::unique_ptr<::grpc::ClientAsyncReaderInterface>> AsyncSayManyHellos(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq, void* tag) { return std::unique_ptr< ::grpc::ClientAsyncReaderInterface>>( AsyncSayManyHellosRaw(context, request, cq, tag)); } std::unique_ptr<::grpc::ClientAsyncReaderInterface>> PrepareAsyncSayManyHellos(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return std::unique_ptr< ::grpc::ClientAsyncReaderInterface>>( PrepareAsyncSayManyHellosRaw(context, request, cq)); } private: virtual ::grpc::ClientAsyncResponseReaderInterface>* AsyncSayHelloRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) = 0; virtual ::grpc::ClientAsyncResponseReaderInterface>* PrepareAsyncSayHelloRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) = 0; virtual ::grpc::ClientReaderInterface>* SayManyHellosRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request) = 0; virtual ::grpc::ClientAsyncReaderInterface>* AsyncSayManyHellosRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq, void* tag) = 0; virtual ::grpc::ClientAsyncReaderInterface>* PrepareAsyncSayManyHellosRaw( ::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) = 0; }; class Stub final : public StubInterface { public: Stub(const std::shared_ptr<::grpc::ChannelInterface>& channel); ::grpc::Status SayHello(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, flatbuffers::grpc::Message* response) override; std::unique_ptr<::grpc::ClientAsyncResponseReader>> AsyncSayHello(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return std::unique_ptr< ::grpc::ClientAsyncResponseReader>>( AsyncSayHelloRaw(context, request, cq)); } std::unique_ptr<::grpc::ClientAsyncResponseReader>> PrepareAsyncSayHello(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return std::unique_ptr< ::grpc::ClientAsyncResponseReader>>( PrepareAsyncSayHelloRaw(context, request, cq)); } std::unique_ptr<::grpc::ClientReader>> SayManyHellos(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request) { return std::unique_ptr<::grpc::ClientReader>>( SayManyHellosRaw(context, request)); } std::unique_ptr<::grpc::ClientAsyncReader>> AsyncSayManyHellos(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq, void* tag) { return std::unique_ptr< ::grpc::ClientAsyncReader>>( AsyncSayManyHellosRaw(context, request, cq, tag)); } std::unique_ptr<::grpc::ClientAsyncReader>> PrepareAsyncSayManyHellos(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) { return std::unique_ptr< ::grpc::ClientAsyncReader>>( PrepareAsyncSayManyHellosRaw(context, request, cq)); } private: std::shared_ptr<::grpc::ChannelInterface> channel_; ::grpc::ClientAsyncResponseReader>* AsyncSayHelloRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) override; ::grpc::ClientAsyncResponseReader>* PrepareAsyncSayHelloRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) override; ::grpc::ClientReader>* SayManyHellosRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request) override; ::grpc::ClientAsyncReader>* AsyncSayManyHellosRaw(::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq, void* tag) override; ::grpc::ClientAsyncReader>* PrepareAsyncSayManyHellosRaw( ::grpc::ClientContext* context, const flatbuffers::grpc::Message& request, ::grpc::CompletionQueue* cq) override; const ::grpc::internal::RpcMethod rpcmethod_SayHello_; const ::grpc::internal::RpcMethod rpcmethod_SayManyHellos_; }; static std::unique_ptr NewStub(const std::shared_ptr<::grpc::ChannelInterface>& channel, const ::grpc::StubOptions& options = ::grpc::StubOptions()); class Service : public ::grpc::Service { public: Service(); virtual ~Service(); virtual ::grpc::Status SayHello(::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, flatbuffers::grpc::Message* response); virtual ::grpc::Status SayManyHellos(::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, ::grpc::ServerWriter>* writer); }; template class WithAsyncMethod_SayHello : public BaseClass { private: void BaseClassMustBeDerivedFromService(const Service* service) {} public: WithAsyncMethod_SayHello() { ::grpc::Service::MarkMethodAsync(0); } ~WithAsyncMethod_SayHello() override { BaseClassMustBeDerivedFromService(this); } // disable synchronous version of this method ::grpc::Status SayHello(::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, flatbuffers::grpc::Message* response) final override { abort(); return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } void RequestSayHello( ::grpc::ServerContext* context, flatbuffers::grpc::Message* request, ::grpc::ServerAsyncResponseWriter>* response, ::grpc::CompletionQueue* new_call_cq, ::grpc::ServerCompletionQueue* notification_cq, void* tag) { ::grpc::Service::RequestAsyncUnary(0, context, request, response, new_call_cq, notification_cq, tag); } }; template class WithAsyncMethod_SayManyHellos : public BaseClass { private: void BaseClassMustBeDerivedFromService(const Service* service) {} public: WithAsyncMethod_SayManyHellos() { ::grpc::Service::MarkMethodAsync(1); } ~WithAsyncMethod_SayManyHellos() override { BaseClassMustBeDerivedFromService(this); } // disable synchronous version of this method ::grpc::Status SayManyHellos( ::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, ::grpc::ServerWriter>* writer) final override { abort(); return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } void RequestSayManyHellos( ::grpc::ServerContext* context, flatbuffers::grpc::Message* request, ::grpc::ServerAsyncWriter>* writer, ::grpc::CompletionQueue* new_call_cq, ::grpc::ServerCompletionQueue* notification_cq, void* tag) { ::grpc::Service::RequestAsyncServerStreaming(1, context, request, writer, new_call_cq, notification_cq, tag); } }; typedef WithAsyncMethod_SayHello> AsyncService; template class WithGenericMethod_SayHello : public BaseClass { private: void BaseClassMustBeDerivedFromService(const Service* service) {} public: WithGenericMethod_SayHello() { ::grpc::Service::MarkMethodGeneric(0); } ~WithGenericMethod_SayHello() override { BaseClassMustBeDerivedFromService(this); } // disable synchronous version of this method ::grpc::Status SayHello(::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, flatbuffers::grpc::Message* response) final override { abort(); return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } }; template class WithGenericMethod_SayManyHellos : public BaseClass { private: void BaseClassMustBeDerivedFromService(const Service* service) {} public: WithGenericMethod_SayManyHellos() { ::grpc::Service::MarkMethodGeneric(1); } ~WithGenericMethod_SayManyHellos() override { BaseClassMustBeDerivedFromService(this); } // disable synchronous version of this method ::grpc::Status SayManyHellos( ::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, ::grpc::ServerWriter>* writer) final override { abort(); return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } }; template class WithStreamedUnaryMethod_SayHello : public BaseClass { private: void BaseClassMustBeDerivedFromService(const Service* service) {} public: WithStreamedUnaryMethod_SayHello() { ::grpc::Service::MarkMethodStreamed( 0, new ::grpc::internal::StreamedUnaryHandler, flatbuffers::grpc::Message>( std::bind(&WithStreamedUnaryMethod_SayHello::StreamedSayHello, this, std::placeholders::_1, std::placeholders::_2))); } ~WithStreamedUnaryMethod_SayHello() override { BaseClassMustBeDerivedFromService(this); } // disable regular version of this method ::grpc::Status SayHello(::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, flatbuffers::grpc::Message* response) final override { abort(); return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } // replace default version of method with streamed unary virtual ::grpc::Status StreamedSayHello(::grpc::ServerContext* context, ::grpc::ServerUnaryStreamer, flatbuffers::grpc::Message>* server_unary_streamer) = 0; }; typedef WithStreamedUnaryMethod_SayHello StreamedUnaryService; template class WithSplitStreamingMethod_SayManyHellos : public BaseClass { private: void BaseClassMustBeDerivedFromService(const Service* service) {} public: WithSplitStreamingMethod_SayManyHellos() { ::grpc::Service::MarkMethodStreamed( 1, new ::grpc::internal::SplitServerStreamingHandler< flatbuffers::grpc::Message, flatbuffers::grpc::Message>(std::bind( &WithSplitStreamingMethod_SayManyHellos::StreamedSayManyHellos, this, std::placeholders::_1, std::placeholders::_2))); } ~WithSplitStreamingMethod_SayManyHellos() override { BaseClassMustBeDerivedFromService(this); } // disable regular version of this method ::grpc::Status SayManyHellos( ::grpc::ServerContext* context, const flatbuffers::grpc::Message* request, ::grpc::ServerWriter>* writer) final override { abort(); return ::grpc::Status(::grpc::StatusCode::UNIMPLEMENTED, ""); } // replace default version of method with split streamed virtual ::grpc::Status StreamedSayManyHellos( ::grpc::ServerContext* context, ::grpc::ServerSplitStreamer, flatbuffers::grpc::Message>* server_split_streamer) = 0; }; typedef WithSplitStreamingMethod_SayManyHellos SplitStreamedService; typedef WithStreamedUnaryMethod_SayHello> StreamedService; }; #endif // GRPC_example__INCLUDED ================================================ FILE: examples/12_FlatBuffers/example_generated.h ================================================ // automatically generated by the FlatBuffers compiler, do not modify #ifndef FLATBUFFERS_GENERATED_EXAMPLE_H_ #define FLATBUFFERS_GENERATED_EXAMPLE_H_ #include "flatbuffers/flatbuffers.h" struct HelloReply; struct HelloRequest; struct ManyHellosRequest; struct HelloReply FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { enum { VT_MESSAGE = 4 }; const flatbuffers::String* message() const { return GetPointer(VT_MESSAGE); } bool Verify(flatbuffers::Verifier& verifier) const { return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MESSAGE) && verifier.VerifyString(message()) && verifier.EndTable(); } }; struct HelloReplyBuilder { flatbuffers::FlatBufferBuilder& fbb_; flatbuffers::uoffset_t start_; void add_message(flatbuffers::Offset message) { fbb_.AddOffset(HelloReply::VT_MESSAGE, message); } explicit HelloReplyBuilder(flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } HelloReplyBuilder& operator=(const HelloReplyBuilder&); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); return o; } }; inline flatbuffers::Offset CreateHelloReply(flatbuffers::FlatBufferBuilder& _fbb, flatbuffers::Offset message = 0) { HelloReplyBuilder builder_(_fbb); builder_.add_message(message); return builder_.Finish(); } inline flatbuffers::Offset CreateHelloReplyDirect(flatbuffers::FlatBufferBuilder& _fbb, const char* message = nullptr) { return CreateHelloReply(_fbb, message ? _fbb.CreateString(message) : 0); } struct HelloRequest FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { enum { VT_NAME = 4 }; const flatbuffers::String* name() const { return GetPointer(VT_NAME); } bool Verify(flatbuffers::Verifier& verifier) const { return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && verifier.VerifyString(name()) && verifier.EndTable(); } }; struct HelloRequestBuilder { flatbuffers::FlatBufferBuilder& fbb_; flatbuffers::uoffset_t start_; void add_name(flatbuffers::Offset name) { fbb_.AddOffset(HelloRequest::VT_NAME, name); } explicit HelloRequestBuilder(flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } HelloRequestBuilder& operator=(const HelloRequestBuilder&); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); return o; } }; inline flatbuffers::Offset CreateHelloRequest(flatbuffers::FlatBufferBuilder& _fbb, flatbuffers::Offset name = 0) { HelloRequestBuilder builder_(_fbb); builder_.add_name(name); return builder_.Finish(); } inline flatbuffers::Offset CreateHelloRequestDirect(flatbuffers::FlatBufferBuilder& _fbb, const char* name = nullptr) { return CreateHelloRequest(_fbb, name ? _fbb.CreateString(name) : 0); } struct ManyHellosRequest FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { enum { VT_NAME = 4, VT_NUM_GREETINGS = 6 }; const flatbuffers::String* name() const { return GetPointer(VT_NAME); } int32_t num_greetings() const { return GetField(VT_NUM_GREETINGS, 0); } bool Verify(flatbuffers::Verifier& verifier) const { return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && verifier.VerifyString(name()) && VerifyField(verifier, VT_NUM_GREETINGS) && verifier.EndTable(); } }; struct ManyHellosRequestBuilder { flatbuffers::FlatBufferBuilder& fbb_; flatbuffers::uoffset_t start_; void add_name(flatbuffers::Offset name) { fbb_.AddOffset(ManyHellosRequest::VT_NAME, name); } void add_num_greetings(int32_t num_greetings) { fbb_.AddElement(ManyHellosRequest::VT_NUM_GREETINGS, num_greetings, 0); } explicit ManyHellosRequestBuilder(flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); } ManyHellosRequestBuilder& operator=(const ManyHellosRequestBuilder&); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); return o; } }; inline flatbuffers::Offset CreateManyHellosRequest(flatbuffers::FlatBufferBuilder& _fbb, flatbuffers::Offset name = 0, int32_t num_greetings = 0) { ManyHellosRequestBuilder builder_(_fbb); builder_.add_num_greetings(num_greetings); builder_.add_name(name); return builder_.Finish(); } inline flatbuffers::Offset CreateManyHellosRequestDirect(flatbuffers::FlatBufferBuilder& _fbb, const char* name = nullptr, int32_t num_greetings = 0) { return CreateManyHellosRequest(_fbb, name ? _fbb.CreateString(name) : 0, num_greetings); } #endif // FLATBUFFERS_GENERATED_EXAMPLE_H_ ================================================ FILE: examples/12_FlatBuffers/server.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "nvrpc/executor.h" #include "nvrpc/server.h" #include "nvrpc/service.h" #include "tensorrt/laboratory/core/resources.h" #include "example.grpc.fb.h" #include "example_generated.h" using nvrpc::AsyncRPC; using nvrpc::AsyncService; using nvrpc::Context; using nvrpc::Executor; using nvrpc::Server; using trtlab::Resources; using Request = flatbuffers::grpc::Message; using Response = flatbuffers::grpc::Message; struct SimpleResources : public Resources { }; class SimpleContext final : public Context { void ExecuteRPC(Request& input, Response& output) final override { flatbuffers::grpc::MessageBuilder mb_; // We call GetRoot to "parse" the message. Verification is already // performed by default. See the notes below for more details. const HelloRequest* request = input.GetRoot(); // Fields are retrieved as usual with FlatBuffers const std::string& name = request->name()->str(); // `flatbuffers::grpc::MessageBuilder` is a `FlatBufferBuilder` with a // special allocator for efficient gRPC buffer transfer, but otherwise // usage is the same as usual. auto msg_offset = mb_.CreateString("Hello, " + name); auto hello_offset = CreateHelloReply(mb_, msg_offset); mb_.Finish(hello_offset); // The `ReleaseMessage()` function detaches the message from the // builder, so we can transfer the resopnse to gRPC while simultaneously // detaching that memory buffer from the builer. output = mb_.ReleaseMessage(); CHECK(output.Verify()); this->FinishResponse(); } }; int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("flatbuffer service"); ::google::ParseCommandLineFlags(&argc, &argv, true); // A server will bind an IP:PORT to listen on Server server("0.0.0.0:50051"); // A server can host multiple services auto simpleInference = server.RegisterAsyncService(); auto rpcCompute = simpleInference->RegisterRPC(&Greeter::AsyncService::RequestSayHello); auto rpcResources = std::make_shared(); auto executor = server.RegisterExecutor(new Executor(1)); executor->RegisterContexts(rpcCompute, rpcResources, 10); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(2000), [] { // This is a timeout loop executed every 2seconds // Run() with no arguments will run an empty timeout loop every 5 seconds. // RunAsync() will return immediately, its your responsibility to ensure the // server doesn't go out of scope or a Shutdown will be triggered on your services. }); } ================================================ FILE: examples/30_PyTensorRT/README.md ================================================ # Python Inference Example ================================================ FILE: examples/30_PyTensorRT/client.py ================================================ #!/usr/bin/env python3 import os import time import numpy as np import infer import infer_test_utils as utils def main(): manager = infer.RemoteInferenceManager(hostname="localhost:50052") models = manager.get_models() print(models) mnist = manager.infer_runner("mnist") print("Input Bindings: {}".format(mnist.input_bindings())) print("Output Bindings: {}".format(mnist.output_bindings())) inputs = utils.load_inputs("/work/models/onnx/mnist-v1.3/test_data_set_0") expected = utils.load_outputs("/work/models/onnx/mnist-v1.3/test_data_set_0") start = time.process_time() results = [mnist.infer(Input3=input) for input in inputs] results = [r.get() for r in results] print("Compute Time: {}".format(time.process_time() - start)) print(results) # for r, e in zip(results, expected): # for key, val in r.items(): # print("Output Binding Name: {}; shape{}".format(key, val.shape)) # r = val.reshape((1,10)) # np.testing.assert_almost_equal(r, e, decimal=3) # models.serve() #mnist_model = models.get_model("mnist") #benchmark = infer.InferBench(models) #benchmark.run(mnist_model, 1, 0.1) #print(results) if __name__ == "__main__": main() ================================================ FILE: examples/30_PyTensorRT/compute.py ================================================ #!/usr/bin/env python3 import itertools import os import time import numpy as np import trtlab import infer_test_utils as utils def main(): models = trtlab.InferenceManager(max_exec_concurrency=1) mnist = models.register_tensorrt_engine("mnist", "/work/models/onnx/mnist-v1.3/mnist-v1.3.engine") models.update_resources() print("Input Bindings: {}".format(mnist.input_bindings())) print("Output Bindings: {}".format(mnist.output_bindings())) inputs = utils.load_inputs("/work/models/onnx/mnist-v1.3/test_data_set_0") expected = utils.load_outputs("/work/models/onnx/mnist-v1.3/test_data_set_0") start = time.process_time() while True: futures = [mnist.infer(Input3=inputs[0]) for _ in range(100)] results = [f.get() for f in futures] # while True: # results = [mnist.infer(Input3=input) for input in itertools.repeat(inputs[0], 1000)] # results = [r.get() for r in results] # time.sleep(0.1) print("Compute Time: {}".format(time.process_time() - start)) # for r, e in zip(results, expected): # for key, val in r.items(): # print("Output Binding Name: {}; shape{}".format(key, val.shape)) # r = val.reshape((1,10)) # np.testing.assert_almost_equal(r, e, decimal=3) #mnist_model = models.get_model("mnist") #benchmark = infer.InferBench(models) #benchmark.run(mnist_model, 1, 0.1) #print(results) if __name__ == "__main__": main() ================================================ FILE: examples/30_PyTensorRT/infer_test_utils.py ================================================ #!/usr/bin/env python3 import glob import os import onnx from onnx import numpy_helper from matplotlib import pyplot as plt import numpy as np def load_inputs(test_data_dir): # Load inputs inputs = [] inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb'))) for i in range(inputs_num): input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i)) tensor = onnx.TensorProto() with open(input_file, 'rb') as f: tensor.ParseFromString(f.read()) inputs.append(numpy_helper.to_array(tensor)) return inputs def load_outputs(test_data_dir): # Load reference outputs ref_outputs = [] ref_outputs_num = len(glob.glob(os.path.join(test_data_dir, 'output_*.pb'))) for i in range(ref_outputs_num): output_file = os.path.join(test_data_dir, 'output_{}.pb'.format(i)) tensor = onnx.TensorProto() with open(output_file, 'rb') as f: tensor.ParseFromString(f.read()) ref_outputs.append(numpy_helper.to_array(tensor)) return ref_outputs def mnist_image(data): two_d = (np.reshape(data, (28, 28))).astype(np.uint8) plt.imshow(two_d, interpolation='nearest') return plt def softmax(x): """Compute softmax values for each sets of scores in x.""" e_x = np.exp(x - np.max(x)) return e_x / e_x.sum() ================================================ FILE: examples/30_PyTensorRT/rebuild.sh ================================================ #!/bin/bash cd /work/build/tensorrt-laboratory/python make -j cd /work/examples/30_PyTensorRT if [ ! -e infer.cpython-35m-x86_64-linux-gnu.so ]; then ln -s /work/build/tensorrt-laboratory/python/tensorrt/infer.cpython-35m-x86_64-linux-gnu.so fi ================================================ FILE: examples/30_PyTensorRT/server.py ================================================ #!/usr/bin/env python3 import os import time import numpy as np import infer import infer_test_utils as utils def main(): models = infer.InferenceManager(max_exec_concurrency=2) mnist = models.register_tensorrt_engine("mnist", "/work/models/onnx/mnist-v1.3/mnist-v1.3.engine") models.update_resources() print("Input Bindings: {}".format(mnist.input_bindings())) print("Output Bindings: {}".format(mnist.output_bindings())) inputs = utils.load_inputs("/work/models/onnx/mnist-v1.3/test_data_set_0") expected = utils.load_outputs("/work/models/onnx/mnist-v1.3/test_data_set_0") start = time.process_time() results = [mnist.infer(Input3=input) for input in inputs] results = [r.get() for r in results] print("Compute Time: {}".format(time.process_time() - start)) for r, e in zip(results, expected): for key, val in r.items(): print("Output Binding Name: {}; shape{}".format(key, val.shape)) r = val.reshape((1,10)) np.testing.assert_almost_equal(r, e, decimal=3) models.serve() #mnist_model = models.get_model("mnist") #benchmark = infer.InferBench(models) #benchmark.run(mnist_model, 1, 0.1) #print(results) if __name__ == "__main__": main() ================================================ FILE: examples/90_Kubernetes/README.md ================================================ # Kubernetes Using [Kubernetes on NVIDIA GPUs, aka KONG](https://developer.nvidia.com/kubernetes-gpu) is a great way of deploying GPU accelerated microservices. This page will act as a guide for for both development and production deployment. * For development, we will use [minikube](https://kubernetes.io/docs/setup/minikube/) to deploy a single-node Kubernetes cluster. * For production, we will use a Kubernetes cluster installed by the [DeepOps project](https://github.com/nvidia/deepops). ## Prerequisites * [Kubernetes v1.10.0](https://kubernetes.io) * [NVIDIA GPU Device Plugin](https://github.com/NVIDIA/k8s-device-plugin#preparing-your-gpu-nodes) * [Helm](https://helm.sh) * [prometheus-operator](https://github.com/coreos/prometheus-operator) ``` helm repo add coreos https://s3-eu-west-1.amazonaws.com/coreos-charts/stable/ ``` ## Setup The following packages will be installed on your Kubernetes cluster: * [CoreOS's Prometheus Operator](https://github.com/coreos/prometheus-operator) for gathering and monitoring metrics * [Istio v0.8](https://istio.io) for ingress and load-balancing After the installation of those packages, we will deploy the following: * Scalable [K8s Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) of the [TensorRT GRPC example](../02_TensorRT_GRPC) `inference-grpc.x` * YAIS specific instance of a Prometheus server that will scrape any Pods labeled `scrape: yais` * Istio `Gateway` and `VirtualService` to route load-balanced traffic to our gRPC service. ## Install Starting at this point, you should have a Kubernetes cluter with all the prerequisites. If you use the [minikube setup](minikube/README.md) you can simply run: ``` ./bootstrap-minikube.sh ``` Otherwise, you can choose to install each of the components manually. ### Prometheus Operator Initialize Helm and install the `prometheus-operator` and `kube-prometheus` ``` cd ../prometheus ./bootstrap.sh cd .. ``` Monitor `kubectl get pods -n monitoring` and wait everything to come up. Customize any settings in the [custom-settings.yml](prometheus/custom-settings.yml) file. This project is exposing the Grafana server as a `NodePort` and providing custom datasource and dashboards for YAIS metrics. ### Istio Initialize Istio. I've provided the Istio v0.8 `istio-demo.yml` modified to use a `NodePort` as `istio/minikube.yml` If you are using a cloude instance, you can change to a `LoadBalancer`. ``` kubectl create namespace istio-system kubectl apply -f istio/istio-v1.0-minikube.yml kubectl label namespace default istio-injection=enabled ``` ### YAIS Service ``` kubectl apply -f yais-deploy.yml ``` This does the following: * `Deployment` - launches the service and resources * `Service` - provides access policy to the deployment pods * `ServiceMonitor` - tells our Prometheus server to scrape YAIS metrics * `Gateway` - ingress host, port and protocol * `VirtualService - routing ingress to services ### Test the Service Use the [`devel.sh`](devel.sh) script in the project's root directory. ``` # from project root ./devel.sh cd build/examples/02_TensorRT_GRPC ./siege.x --port 31380 --rate=1000 ``` `31380` is the default `NodePort` for the Istio `ingressgateway`. Note: If you get errors, sometime it takes a short period of time before the ingress gateway is updated to reflect the routing. ### Check the Metrics ``` kubectl get svc -n monitoring | grep grafana ``` The default login is `admin/admin`. Navigate to the `YAIS` dashboard. Celebrate. ================================================ FILE: examples/90_Kubernetes/bootstrap-minikube.sh ================================================ #!/bin/bash if ! [ -x "$(command -v helm)" ]; then echo 'Error: helm is not installed.' >&2 exit 1 fi # minikube (cd minikube && ./bootstrap.sh) # prometheus-operator (cd prometheus && ./bootstrap.sh) # istio kubectl create namespace istio-system kubectl apply -f istio/istio-v1.0-minikube.yml sleep 30 kubectl label namespace default istio-injection=enabled # deploy yais example kubectl apply -f yais-deploy.yml # sleep 15 # kubectl label namespace default istio-injection- ================================================ FILE: examples/90_Kubernetes/deploy/build-and-run.sh ================================================ #!/bin/bash default_engine=/work/models/ResNet-152-b8-fp16.engine concurrency=${YAIS_CONCURRENCY:-1} engine=${YAIS_TRT_ENGINE:-$default_engine} if [ "$engine" = "$default_engine" ]; then if [ ! -e $engine ]; then cd /work/models ./setup.py fi fi /work/build/examples/02_TensorRT_GRPC/inference-grpc.x \ --engine=${engine} \ --contexts=${concurrency} ================================================ FILE: examples/90_Kubernetes/devel/README.md ================================================ ## Round 1: External Service Before deploying a YAIS service with Kubernetes, we will first setup a developer environment where we will execute our service in a Docker development container. We can still use our Kubernetes/Prometheus/Grafana environment to gather and visualize metrics. To do so, we will create an external service pointing at our host. Edit the `yais-devel.yml` and modify the IP address of the `Endpoint` to point at the host machine running minikube (`sudo minikube ip`). ``` apiVersion: v1 kind: Endpoints metadata: name: yais-devel subsets: - addresses: - ip: 10.0.0.10 # <== ChangeMe ports: - name: metrics port: 50078 ``` ``` kubectl apply -f yais-devel.yml ``` This will create a Prometheus `ServiceMonitor` that will scrape from the external service, i.e. the Docker development container. This is a good first start at integrating your service into Kubernetes without having to do a full blown deployment. Congrats, your minikube cluster now looking for services labeled `scrape: yais` and if found, will automatically start scraped the port labels `metrics`. The final step is to bring an inference service online and to generate some load on that service. Launch the YAIS developement container using the `devel.sh` script in the project's root directory. Make sure all the examples have been built and models have been build, see [README::Quickstart](README.md#quickstart). ``` cd examples/97_SingleProcessMultiSteam root@dgx:/work/examples/97_SingleProcessMultiSteam# ./launch_service.sh 1 1 /work/models/ResNet-50-b1-fp32.engine I0709 10:13:41.175212 468 Server.cc:37] gRPC listening on: 0.0.0.0:50051 I0709 10:13:41.175477 468 server.cc:229] Register Service (flowers::Inference) with Server I0709 10:13:41.175492 468 server.cc:238] Register RPC (flowers::Inference::Compute) with Service (flowers::Inference) I0709 10:13:41.175500 468 server.cc:243] Initializing Resources for RPC (flowers::Inference::Compute) I0709 10:13:41.273568 468 TensorRT.cc:561] -- Initialzing TensorRT Resource Manager -- I0709 10:13:41.273602 468 TensorRT.cc:562] Maximum Execution Concurrency: 1 I0709 10:13:41.273609 468 TensorRT.cc:563] Maximum Copy Concurrency: 3 I0709 10:13:42.596443 468 TensorRT.cc:628] -- Registering Model: flowers -- I0709 10:13:42.596500 468 TensorRT.cc:629] Input/Output Tensors require 591.9 KiB I0709 10:13:42.596511 468 TensorRT.cc:630] Execution Activations require 7.8 MiB I0709 10:13:42.604210 468 TensorRT.cc:652] -- Allocating TensorRT Resources -- I0709 10:13:42.604228 468 TensorRT.cc:653] Creating 1 TensorRT execution tokens. I0709 10:13:42.604236 468 TensorRT.cc:654] Creating a Pool of 3 Host/Device Memory Stacks I0709 10:13:42.604248 468 TensorRT.cc:655] Each Host Stack contains 608.0 KiB I0709 10:13:42.604256 468 TensorRT.cc:656] Each Device Stack contains 8.5 MiB I0709 10:13:42.604264 468 TensorRT.cc:657] Total GPU Memory: 25.5 MiB I0709 10:13:42.606546 468 server.cc:255] Initializing Executor I0709 10:13:42.606832 468 server.cc:259] Registering Execution Contexts for RPC (flowers::Inference::Compute) with Executor I0709 10:13:42.606889 468 server.cc:262] Running Server warmup with client-async.x 1000 requests in 2.60522seconds; inf/sec: 383.845 Starting a shell keeping the services and load-balancer running... Try /work/build/examples/02_TensorRT_GRPC/siege.x --rate=2000 --port=50051 1 x /work/models/ResNet-50-b1-fp32.engine Subshell: ``` Use `telegraf` and watch the scrape count; the `yais-devel` scraper is set to pull metrics every 2 seconds. It can up to a minute or so until you see scraping from your k8s cluster. ``` 1 x /work/models/ResNet-50-b1-fp32.engine Subshell: telegraf -test -config /work/examples/91_Prometheus/scrape.conf ... > exposer_bytes_transferred,host=dgx,url=http://localhost:50078/metrics counter=0 1531131559000000000 # <== watch the counter ... ``` ## Round 2: Package and Deploy TODO - We could use some community help here. ## Round 3: Optimize Deploy Contaienr TODO - We could use some community help here. ================================================ FILE: examples/90_Kubernetes/devel/yais-devel.yml ================================================ --- apiVersion: v1 kind: Service metadata: name: yais-devel labels: app: yais-devel spec: ports: - name: metrics port: 51078 targetPort: 50078 --- apiVersion: v1 kind: Endpoints metadata: name: yais-devel subsets: - addresses: - ip: 10.0.0.10 ports: - name: metrics port: 50078 --- # this will get scraped by the default kube-prometheus # and the yais-metric prometheus service (if it is running) apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: yais-devel labels: scrape: yais spec: selector: matchLabels: app: yais-devel endpoints: - port: metrics interval: 2s honorLabels: true ================================================ FILE: examples/90_Kubernetes/istio/README.md ================================================ # Istio ## Install ``` # Download the latest release curl -L https://git.io/getLatestIstio | sh - # Istio 1.0 helm template install/kubernetes/helm/istio --name istio --namespace istio-system \ --set gateways.istio-ingressgateway.type=NodePort \ --set gateways.istio-egressgateway.type=NodePort > istio-v1.0-minikube.yml ``` Install Istio and enable the default namespace for injection; however, only pods with the proper annotations will have sidecars injected. ``` kubectl create namespace istio-system kubectl apply -f istio-v1.0-minikube.yml kubectl label namespace default istio-injection=enabled kubectl get namespace -L istio-injection ``` The annotation required for sidecar injection: ``` apiVersion: extensions/v1beta1 kind: Deployment ... spec: template: metadata: annotations: # <== sidecar sidecar.istio.io/inject: "false" # <== annotation ... ``` ================================================ FILE: examples/90_Kubernetes/istio/rendered/istio-v0.8-minikube.yml ================================================ apiVersion: v1 kind: Namespace metadata: name: istio-system --- # Source: istio/charts/mixer/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: istio-statsd-prom-bridge namespace: istio-system labels: app: istio-statsd-prom-bridge chart: mixer-0.8.0 release: RELEASE-NAME heritage: Tiller istio: mixer data: mapping.conf: |- --- apiVersion: v1 kind: ConfigMap metadata: name: istio-mixer-custom-resources namespace: istio-system labels: app: istio-mixer chart: mixer-0.8.0 release: RELEASE-NAME heritage: Tiller istio: mixer data: custom-resources.yaml: |- apiVersion: "config.istio.io/v1alpha2" kind: attributemanifest metadata: name: istioproxy namespace: istio-system spec: attributes: origin.ip: valueType: IP_ADDRESS origin.uid: valueType: STRING origin.user: valueType: STRING request.headers: valueType: STRING_MAP request.id: valueType: STRING request.host: valueType: STRING request.method: valueType: STRING request.path: valueType: STRING request.reason: valueType: STRING request.referer: valueType: STRING request.scheme: valueType: STRING request.total_size: valueType: INT64 request.size: valueType: INT64 request.time: valueType: TIMESTAMP request.useragent: valueType: STRING response.code: valueType: INT64 response.duration: valueType: DURATION response.headers: valueType: STRING_MAP response.total_size: valueType: INT64 response.size: valueType: INT64 response.time: valueType: TIMESTAMP source.uid: valueType: STRING source.user: valueType: STRING destination.uid: valueType: STRING connection.id: valueType: STRING connection.received.bytes: valueType: INT64 connection.received.bytes_total: valueType: INT64 connection.sent.bytes: valueType: INT64 connection.sent.bytes_total: valueType: INT64 connection.duration: valueType: DURATION connection.mtls: valueType: BOOL context.protocol: valueType: STRING context.timestamp: valueType: TIMESTAMP context.time: valueType: TIMESTAMP api.service: valueType: STRING api.version: valueType: STRING api.operation: valueType: STRING api.protocol: valueType: STRING request.auth.principal: valueType: STRING request.auth.audiences: valueType: STRING request.auth.presenter: valueType: STRING request.auth.claims: valueType: STRING_MAP request.auth.raw_claims: valueType: STRING request.api_key: valueType: STRING --- apiVersion: "config.istio.io/v1alpha2" kind: attributemanifest metadata: name: kubernetes namespace: istio-system spec: attributes: source.ip: valueType: IP_ADDRESS source.labels: valueType: STRING_MAP source.name: valueType: STRING source.namespace: valueType: STRING source.service: valueType: STRING source.serviceAccount: valueType: STRING destination.ip: valueType: IP_ADDRESS destination.labels: valueType: STRING_MAP destination.name: valueType: STRING destination.namespace: valueType: STRING destination.service: valueType: STRING destination.serviceAccount: valueType: STRING --- apiVersion: "config.istio.io/v1alpha2" kind: stdio metadata: name: handler namespace: istio-system spec: outputAsJson: true --- apiVersion: "config.istio.io/v1alpha2" kind: logentry metadata: name: accesslog namespace: istio-system spec: severity: '"Info"' timestamp: request.time variables: originIp: origin.ip | ip("0.0.0.0") sourceIp: source.ip | ip("0.0.0.0") sourceService: source.service | "" sourceUser: source.user | source.uid | "" sourceNamespace: source.namespace | "" destinationIp: destination.ip | ip("0.0.0.0") destinationService: destination.service | "" destinationNamespace: destination.namespace | "" apiName: api.service | "" apiVersion: api.version | "" apiClaims: request.headers["sec-istio-auth-userinfo"]| "" apiKey: request.api_key | request.headers["x-api-key"] | "" requestOperation: api.operation | "" protocol: request.scheme | "http" method: request.method | "" url: request.path | "" responseCode: response.code | 0 responseSize: response.size | 0 requestSize: request.size | 0 latency: response.duration | "0ms" connectionMtls: connection.mtls | false userAgent: request.useragent | "" responseTimestamp: response.time receivedBytes: request.total_size | connection.received.bytes | 0 sentBytes: response.total_size | connection.sent.bytes | 0 referer: request.referer | "" monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: stdio namespace: istio-system spec: match: "true" # If omitted match is true. actions: - handler: handler.stdio instances: - accesslog.logentry --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: requestcount namespace: istio-system spec: value: "1" dimensions: source_service: source.service | "unknown" source_version: source.labels["version"] | "unknown" destination_service: destination.service | "unknown" destination_version: destination.labels["version"] | "unknown" response_code: response.code | 200 connection_mtls: connection.mtls | false monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: requestduration namespace: istio-system spec: value: response.duration | "0ms" dimensions: source_service: source.service | "unknown" source_version: source.labels["version"] | "unknown" destination_service: destination.service | "unknown" destination_version: destination.labels["version"] | "unknown" response_code: response.code | 200 connection_mtls: connection.mtls | false monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: requestsize namespace: istio-system spec: value: request.size | 0 dimensions: source_service: source.service | "unknown" source_version: source.labels["version"] | "unknown" destination_service: destination.service | "unknown" destination_version: destination.labels["version"] | "unknown" response_code: response.code | 200 connection_mtls: connection.mtls | false monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: responsesize namespace: istio-system spec: value: response.size | 0 dimensions: source_service: source.service | "unknown" source_version: source.labels["version"] | "unknown" destination_service: destination.service | "unknown" destination_version: destination.labels["version"] | "unknown" response_code: response.code | 200 connection_mtls: connection.mtls | false monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: tcpbytesent namespace: istio-system labels: istio-protocol: tcp # needed so that mixer will only generate when context.protocol == tcp spec: value: connection.sent.bytes | 0 dimensions: source_service: source.service | "unknown" source_version: source.labels["version"] | "unknown" destination_service: destination.service | "unknown" destination_version: destination.labels["version"] | "unknown" connection_mtls: connection.mtls | false monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: tcpbytereceived namespace: istio-system labels: istio-protocol: tcp # needed so that mixer will only generate when context.protocol == tcp spec: value: connection.received.bytes | 0 dimensions: source_service: source.service | "unknown" source_version: source.labels["version"] | "unknown" destination_service: destination.service | "unknown" destination_version: destination.labels["version"] | "unknown" connection_mtls: connection.mtls | false monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: prometheus metadata: name: handler namespace: istio-system spec: metrics: - name: request_count instance_name: requestcount.metric.istio-system kind: COUNTER label_names: - source_service - source_version - destination_service - destination_version - response_code - connection_mtls - name: request_duration instance_name: requestduration.metric.istio-system kind: DISTRIBUTION label_names: - source_service - source_version - destination_service - destination_version - response_code - connection_mtls buckets: explicit_buckets: bounds: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10] - name: request_size instance_name: requestsize.metric.istio-system kind: DISTRIBUTION label_names: - source_service - source_version - destination_service - destination_version - response_code - connection_mtls buckets: exponentialBuckets: numFiniteBuckets: 8 scale: 1 growthFactor: 10 - name: response_size instance_name: responsesize.metric.istio-system kind: DISTRIBUTION label_names: - source_service - source_version - destination_service - destination_version - response_code - connection_mtls buckets: exponentialBuckets: numFiniteBuckets: 8 scale: 1 growthFactor: 10 - name: tcp_bytes_sent instance_name: tcpbytesent.metric.istio-system kind: COUNTER label_names: - source_service - source_version - destination_service - destination_version - connection_mtls - name: tcp_bytes_received instance_name: tcpbytereceived.metric.istio-system kind: COUNTER label_names: - source_service - source_version - destination_service - destination_version - connection_mtls --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: promhttp namespace: istio-system labels: istio-protocol: http spec: actions: - handler: handler.prometheus instances: - requestcount.metric - requestduration.metric - requestsize.metric - responsesize.metric --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: promtcp namespace: istio-system labels: istio-protocol: tcp # needed so that mixer will only execute when context.protocol == TCP spec: actions: - handler: handler.prometheus instances: - tcpbytesent.metric - tcpbytereceived.metric --- apiVersion: "config.istio.io/v1alpha2" kind: kubernetesenv metadata: name: handler namespace: istio-system spec: # when running from mixer root, use the following config after adding a # symbolic link to a kubernetes config file via: # # $ ln -s ~/.kube/config mixer/adapter/kubernetes/kubeconfig # # kubeconfig_path: "mixer/adapter/kubernetes/kubeconfig" --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: kubeattrgenrulerule namespace: istio-system spec: actions: - handler: handler.kubernetesenv instances: - attributes.kubernetes --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: tcpkubeattrgenrulerule namespace: istio-system spec: match: context.protocol == "tcp" actions: - handler: handler.kubernetesenv instances: - attributes.kubernetes --- apiVersion: "config.istio.io/v1alpha2" kind: kubernetes metadata: name: attributes namespace: istio-system spec: # Pass the required attribute data to the adapter source_uid: source.uid | "" source_ip: source.ip | ip("0.0.0.0") # default to unspecified ip addr destination_uid: destination.uid | "" origin_uid: '""' origin_ip: ip("0.0.0.0") # default to unspecified ip addr attribute_bindings: # Fill the new attributes from the adapter produced output. # $out refers to an instance of OutputTemplate message source.ip: $out.source_pod_ip | ip("0.0.0.0") source.labels: $out.source_labels | emptyStringMap() source.namespace: $out.source_namespace | "default" source.service: $out.source_service | "unknown" source.serviceAccount: $out.source_service_account_name | "unknown" destination.ip: $out.destination_pod_ip | ip("0.0.0.0") destination.labels: $out.destination_labels | emptyStringMap() destination.namespace: $out.destination_namespace | "default" destination.service: $out.destination_service | "unknown" destination.serviceAccount: $out.destination_service_account_name | "unknown" --- # Configuration needed by Mixer. # Mixer cluster is delivered via CDS # Specify mixer cluster settings apiVersion: networking.istio.io/v1alpha3 kind: DestinationRule metadata: name: istio-policy namespace: istio-system spec: host: istio-policy.istio-system.svc.cluster.local trafficPolicy: connectionPool: http: http2MaxRequests: 10000 maxRequestsPerConnection: 10000 --- apiVersion: networking.istio.io/v1alpha3 kind: DestinationRule metadata: name: istio-telemetry namespace: istio-system spec: host: istio-telemetry.istio-system.svc.cluster.local trafficPolicy: connectionPool: http: http2MaxRequests: 10000 maxRequestsPerConnection: 10000 --- --- # Source: istio/charts/prometheus/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: prometheus namespace: istio-system labels: app: prometheus chart: prometheus-0.1.0 release: RELEASE-NAME heritage: Tiller data: prometheus.yml: |- global: scrape_interval: 15s scrape_configs: - job_name: 'istio-mesh' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-system;istio-telemetry;prometheus - job_name: 'envoy' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-system;istio-statsd-prom-bridge;statsd-prom - job_name: 'istio-policy' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-system;istio-policy;http-monitoring - job_name: 'istio-telemetry' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-system;istio-telemetry;http-monitoring - job_name: 'pilot' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-system;istio-pilot;http-monitoring # scrape config for API servers - job_name: 'kubernetes-apiservers' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https # scrape config for nodes (kubelet) - job_name: 'kubernetes-nodes' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics # Scrape config for Kubelet cAdvisor. # # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics # (those whose names begin with 'container_') have been removed from the # Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to # retrieve those metrics. # # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics" # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with # the --cadvisor-port=0 Kubelet flag). # # This job is not necessary and should be removed in Kubernetes 1.6 and # earlier versions, or it will cause the metrics to be scraped twice. - job_name: 'kubernetes-cadvisor' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor # scrape config for service endpoints. - job_name: 'kubernetes-service-endpoints' kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] action: replace target_label: __scheme__ regex: (https?) - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] action: replace target_label: kubernetes_name # Example scrape config for pods - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: pod_name --- # Source: istio/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: istio namespace: istio-system labels: app: istio chart: istio-0.8.0 release: RELEASE-NAME heritage: Tiller data: mesh: |- # # Edit this list to avoid using mTLS to connect to these services. # Typically, these are control services (e.g kubernetes API server) that don't have istio sidecar # to transparently terminate mTLS authentication. # mtlsExcludedServices: ["kubernetes.default.svc.cluster.local"] # Set the following variable to true to disable policy checks by the Mixer. # Note that metrics will still be reported to the Mixer. disablePolicyChecks: false # Set enableTracing to false to disable request tracing. enableTracing: true # # To disable the mixer completely (including metrics), comment out # the following lines mixerCheckServer: istio-policy.istio-system.svc.cluster.local:15004 mixerReportServer: istio-telemetry.istio-system.svc.cluster.local:15004 # This is the ingress service name, update if you used a different name ingressService: istio-ingress # # Along with discoveryRefreshDelay, this setting determines how # frequently should Envoy fetch and update its internal configuration # from istio Pilot. Lower refresh delay results in higher CPU # utilization and potential performance loss in exchange for faster # convergence. Tweak this value according to your setup. rdsRefreshDelay: 10s # defaultConfig: # NOTE: If you change any values in this section, make sure to make # the same changes in start up args in istio-ingress pods. # See rdsRefreshDelay for explanation about this setting. discoveryRefreshDelay: 10s # # TCP connection timeout between Envoy & the application, and between Envoys. connectTimeout: 10s # ### ADVANCED SETTINGS ############# # Where should envoy's configuration be stored in the istio-proxy container configPath: "/etc/istio/proxy" binaryPath: "/usr/local/bin/envoy" # The pseudo service name used for Envoy. serviceCluster: istio-proxy # These settings that determine how long an old Envoy # process should be kept alive after an occasional reload. drainDuration: 45s parentShutdownDuration: 1m0s # # The mode used to redirect inbound connections to Envoy. This setting # has no effect on outbound traffic: iptables REDIRECT is always used for # outbound connections. # If "REDIRECT", use iptables REDIRECT to NAT and redirect to Envoy. # The "REDIRECT" mode loses source addresses during redirection. # If "TPROXY", use iptables TPROXY to redirect to Envoy. # The "TPROXY" mode preserves both the source and destination IP # addresses and ports, so that they can be used for advanced filtering # and manipulation. # The "TPROXY" mode also configures the sidecar to run with the # CAP_NET_ADMIN capability, which is required to use TPROXY. #interceptionMode: REDIRECT # # Port where Envoy listens (on local host) for admin commands # You can exec into the istio-proxy container in a pod and # curl the admin port (curl http://localhost:15000/) to obtain # diagnostic information from Envoy. See # https://lyft.github.io/envoy/docs/operations/admin.html # for more details proxyAdminPort: 15000 # # Zipkin trace collector zipkinAddress: zipkin.istio-system:9411 # # Statsd metrics collector converts statsd metrics into Prometheus metrics. statsdUdpAddress: istio-statsd-prom-bridge.istio-system:9125 # # Mutual TLS authentication between sidecars and istio control plane. controlPlaneAuthPolicy: NONE # # Address where istio Pilot service is running discoveryAddress: istio-pilot.istio-system:15007 --- # Source: istio/templates/sidecar-injector-configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: istio-sidecar-injector namespace: istio-system labels: app: istio chart: istio-0.8.0 release: RELEASE-NAME heritage: Tiller istio: sidecar-injector data: config: |- policy: enabled template: |- initContainers: - name: istio-init image: docker.io/istio/proxy_init:0.8.0 args: - "-p" - [[ .MeshConfig.ProxyListenPort ]] - "-u" - 1337 - "-m" - [[ or (index .ObjectMeta.Annotations "sidecar.istio.io/interceptionMode") .ProxyConfig.InterceptionMode.String ]] - "-i" [[ if (isset .ObjectMeta.Annotations "traffic.sidecar.istio.io/includeOutboundIPRanges") -]] - "[[ index .ObjectMeta.Annotations "traffic.sidecar.istio.io/includeOutboundIPRanges" ]]" [[ else -]] - "*" [[ end -]] - "-x" [[ if (isset .ObjectMeta.Annotations "traffic.sidecar.istio.io/excludeOutboundIPRanges") -]] - "[[ index .ObjectMeta.Annotations "traffic.sidecar.istio.io/excludeOutboundIPRanges" ]]" [[ else -]] - "" [[ end -]] - "-b" [[ if (isset .ObjectMeta.Annotations "traffic.sidecar.istio.io/includeInboundPorts") -]] - "[[ index .ObjectMeta.Annotations "traffic.sidecar.istio.io/includeInboundPorts" ]]" [[ else -]] - [[ range .Spec.Containers -]][[ range .Ports -]][[ .ContainerPort -]], [[ end -]][[ end -]][[ end]] - "-d" [[ if (isset .ObjectMeta.Annotations "traffic.sidecar.istio.io/excludeInboundPorts") -]] - "[[ index .ObjectMeta.Annotations "traffic.sidecar.istio.io/excludeInboundPorts" ]]" [[ else -]] - "" [[ end -]] imagePullPolicy: IfNotPresent securityContext: capabilities: add: - NET_ADMIN privileged: true restartPolicy: Always containers: - name: istio-proxy image: [[ if (isset .ObjectMeta.Annotations "sidecar.istio.io/proxyImage") -]] "[[ index .ObjectMeta.Annotations "sidecar.istio.io/proxyImage" ]]" [[ else -]] docker.io/istio/proxyv2:0.8.0 [[ end -]] args: - proxy - sidecar - --configPath - [[ .ProxyConfig.ConfigPath ]] - --binaryPath - [[ .ProxyConfig.BinaryPath ]] - --serviceCluster [[ if ne "" (index .ObjectMeta.Labels "app") -]] - [[ index .ObjectMeta.Labels "app" ]] [[ else -]] - "istio-proxy" [[ end -]] - --drainDuration - [[ formatDuration .ProxyConfig.DrainDuration ]] - --parentShutdownDuration - [[ formatDuration .ProxyConfig.ParentShutdownDuration ]] - --discoveryAddress - [[ .ProxyConfig.DiscoveryAddress ]] - --discoveryRefreshDelay - [[ formatDuration .ProxyConfig.DiscoveryRefreshDelay ]] - --zipkinAddress - [[ .ProxyConfig.ZipkinAddress ]] - --connectTimeout - [[ formatDuration .ProxyConfig.ConnectTimeout ]] - --statsdUdpAddress - [[ .ProxyConfig.StatsdUdpAddress ]] - --proxyAdminPort - [[ .ProxyConfig.ProxyAdminPort ]] - --controlPlaneAuthPolicy - [[ .ProxyConfig.ControlPlaneAuthPolicy ]] env: - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: fieldPath: status.podIP - name: ISTIO_META_POD_NAME valueFrom: fieldRef: fieldPath: metadata.name - name: ISTIO_META_INTERCEPTION_MODE value: [[ or (index .ObjectMeta.Annotations "sidecar.istio.io/interceptionMode") .ProxyConfig.InterceptionMode.String ]] imagePullPolicy: IfNotPresent securityContext: privileged: false readOnlyRootFilesystem: true [[ if eq (or (index .ObjectMeta.Annotations "sidecar.istio.io/interceptionMode") .ProxyConfig.InterceptionMode.String) "TPROXY" -]] capabilities: add: - NET_ADMIN [[ else -]] runAsUser: 1337 [[ end -]] restartPolicy: Always resources: requests: cpu: 100m memory: 128Mi volumeMounts: - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /etc/certs/ name: istio-certs readOnly: true volumes: - emptyDir: medium: Memory name: istio-envoy - name: istio-certs secret: optional: true [[ if eq .Spec.ServiceAccountName "" -]] secretName: istio.default [[ else -]] secretName: [[ printf "istio.%s" .Spec.ServiceAccountName ]] [[ end -]] --- # Source: istio/charts/egressgateway/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-egressgateway-service-account namespace: istio-system labels: app: egressgateway chart: egressgateway-0.8.0 heritage: Tiller release: RELEASE-NAME --- # Source: istio/charts/ingressgateway/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-ingressgateway-service-account namespace: istio-system labels: app: ingressgateway chart: ingressgateway-0.8.0 heritage: Tiller release: RELEASE-NAME --- # Source: istio/charts/mixer/templates/create-custom-resources-job.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-mixer-post-install-account namespace: istio-system labels: app: mixer chart: mixer-0.8.0 heritage: Tiller release: RELEASE-NAME --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-mixer-post-install-istio-system namespace: istio-system labels: app: mixer chart: mixer-0.8.0 heritage: Tiller release: RELEASE-NAME rules: - apiGroups: ["config.istio.io"] # istio CRD watcher resources: ["*"] verbs: ["create", "get", "list", "watch", "patch"] - apiGroups: ["networking.istio.io"] # needed to create mixer destination rules resources: ["*"] verbs: ["*"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["configmaps", "endpoints", "pods", "services", "namespaces", "secrets"] verbs: ["get", "list", "watch"] --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-mixer-post-install-role-binding-istio-system labels: app: mixer chart: mixer-0.8.0 heritage: Tiller release: RELEASE-NAME roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-mixer-post-install-istio-system subjects: - kind: ServiceAccount name: istio-mixer-post-install-account namespace: istio-system --- apiVersion: batch/v1 kind: Job metadata: name: istio-mixer-post-install namespace: istio-system annotations: "helm.sh/hook": post-install "helm.sh/hook-delete-policy": before-hook-creation labels: app: mixer chart: mixer-0.8.0 release: RELEASE-NAME heritage: Tiller spec: template: metadata: name: istio-mixer-post-install labels: app: mixer release: RELEASE-NAME spec: serviceAccountName: istio-mixer-post-install-account containers: - name: hyperkube image: "quay.io/coreos/hyperkube:v1.7.6_coreos.0" command: - ./kubectl - apply - -f - /tmp/mixer/custom-resources.yaml volumeMounts: - mountPath: "/tmp/mixer" name: tmp-configmap-mixer volumes: - name: tmp-configmap-mixer configMap: name: istio-mixer-custom-resources restartPolicy: Never # CRD might take some time till they are available to consume --- # Source: istio/charts/mixer/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-mixer-service-account namespace: istio-system labels: app: mixer chart: mixer-0.8.0 heritage: Tiller release: RELEASE-NAME --- # Source: istio/charts/pilot/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-pilot-service-account namespace: istio-system labels: app: istio-pilot chart: pilot-0.8.0 heritage: Tiller release: RELEASE-NAME --- # Source: istio/charts/prometheus/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: prometheus namespace: istio-system --- # Source: istio/charts/security/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-citadel-service-account namespace: istio-system labels: app: security chart: security-0.8.0 heritage: Tiller release: RELEASE-NAME --- apiVersion: v1 kind: ServiceAccount metadata: name: istio-cleanup-old-ca-service-account namespace: istio-system labels: app: security chart: security-0.8.0 heritage: Tiller release: RELEASE-NAME --- # Source: istio/charts/sidecarInjectorWebhook/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-sidecar-injector-service-account namespace: istio-system labels: app: istio-sidecar-injector chart: sidecarInjectorWebhook-0.8.0 heritage: Tiller release: RELEASE-NAME --- # Source: istio/charts/mixer/templates/crds.yaml # Mixer CRDs kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: rules.config.istio.io labels: app: mixer package: istio.io.mixer istio: core spec: group: config.istio.io names: kind: rule plural: rules singular: rule scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: attributemanifests.config.istio.io labels: app: mixer package: istio.io.mixer istio: core spec: group: config.istio.io names: kind: attributemanifest plural: attributemanifests singular: attributemanifest scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: circonuses.config.istio.io labels: app: mixer package: circonus istio: mixer-adapter spec: group: config.istio.io names: kind: circonus plural: circonuses singular: circonus scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: deniers.config.istio.io labels: app: mixer package: denier istio: mixer-adapter spec: group: config.istio.io names: kind: denier plural: deniers singular: denier scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: fluentds.config.istio.io labels: app: mixer package: fluentd istio: mixer-adapter spec: group: config.istio.io names: kind: fluentd plural: fluentds singular: fluentd scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: kubernetesenvs.config.istio.io labels: app: mixer package: kubernetesenv istio: mixer-adapter spec: group: config.istio.io names: kind: kubernetesenv plural: kubernetesenvs singular: kubernetesenv scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: listcheckers.config.istio.io labels: app: mixer package: listchecker istio: mixer-adapter spec: group: config.istio.io names: kind: listchecker plural: listcheckers singular: listchecker scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: memquotas.config.istio.io labels: app: mixer package: memquota istio: mixer-adapter spec: group: config.istio.io names: kind: memquota plural: memquotas singular: memquota scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: noops.config.istio.io labels: app: mixer package: noop istio: mixer-adapter spec: group: config.istio.io names: kind: noop plural: noops singular: noop scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: opas.config.istio.io labels: app: mixer package: opa istio: mixer-adapter spec: group: config.istio.io names: kind: opa plural: opas singular: opa scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: prometheuses.config.istio.io labels: app: mixer package: prometheus istio: mixer-adapter spec: group: config.istio.io names: kind: prometheus plural: prometheuses singular: prometheus scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: rbacs.config.istio.io labels: app: mixer package: rbac istio: mixer-adapter spec: group: config.istio.io names: kind: rbac plural: rbacs singular: rbac scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: servicecontrols.config.istio.io labels: app: mixer package: servicecontrol istio: mixer-adapter spec: group: config.istio.io names: kind: servicecontrol plural: servicecontrols singular: servicecontrol scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: solarwindses.config.istio.io labels: app: mixer package: solarwinds istio: mixer-adapter spec: group: config.istio.io names: kind: solarwinds plural: solarwindses singular: solarwinds scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: stackdrivers.config.istio.io labels: app: mixer package: stackdriver istio: mixer-adapter spec: group: config.istio.io names: kind: stackdriver plural: stackdrivers singular: stackdriver scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: statsds.config.istio.io labels: app: mixer package: statsd istio: mixer-adapter spec: group: config.istio.io names: kind: statsd plural: statsds singular: statsd scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: stdios.config.istio.io labels: app: mixer package: stdio istio: mixer-adapter spec: group: config.istio.io names: kind: stdio plural: stdios singular: stdio scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: apikeys.config.istio.io labels: app: mixer package: apikey istio: mixer-instance spec: group: config.istio.io names: kind: apikey plural: apikeys singular: apikey scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: authorizations.config.istio.io labels: app: mixer package: authorization istio: mixer-instance spec: group: config.istio.io names: kind: authorization plural: authorizations singular: authorization scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: checknothings.config.istio.io labels: app: mixer package: checknothing istio: mixer-instance spec: group: config.istio.io names: kind: checknothing plural: checknothings singular: checknothing scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: kuberneteses.config.istio.io labels: app: mixer package: adapter.template.kubernetes istio: mixer-instance spec: group: config.istio.io names: kind: kubernetes plural: kuberneteses singular: kubernetes scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: listentries.config.istio.io labels: app: mixer package: listentry istio: mixer-instance spec: group: config.istio.io names: kind: listentry plural: listentries singular: listentry scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: logentries.config.istio.io labels: app: mixer package: logentry istio: mixer-instance spec: group: config.istio.io names: kind: logentry plural: logentries singular: logentry scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: metrics.config.istio.io labels: app: mixer package: metric istio: mixer-instance spec: group: config.istio.io names: kind: metric plural: metrics singular: metric scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: quotas.config.istio.io labels: app: mixer package: quota istio: mixer-instance spec: group: config.istio.io names: kind: quota plural: quotas singular: quota scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: reportnothings.config.istio.io labels: app: mixer package: reportnothing istio: mixer-instance spec: group: config.istio.io names: kind: reportnothing plural: reportnothings singular: reportnothing scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: servicecontrolreports.config.istio.io labels: app: mixer package: servicecontrolreport istio: mixer-instance spec: group: config.istio.io names: kind: servicecontrolreport plural: servicecontrolreports singular: servicecontrolreport scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: tracespans.config.istio.io labels: app: mixer package: tracespan istio: mixer-instance spec: group: config.istio.io names: kind: tracespan plural: tracespans singular: tracespan scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: serviceroles.config.istio.io labels: app: mixer package: istio.io.mixer istio: rbac spec: group: config.istio.io names: kind: ServiceRole plural: serviceroles singular: servicerole scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: servicerolebindings.config.istio.io labels: app: mixer package: istio.io.mixer istio: rbac spec: group: config.istio.io names: kind: ServiceRoleBinding plural: servicerolebindings singular: servicerolebinding scope: Namespaced version: v1alpha2 --- # Source: istio/charts/pilot/templates/crds.yaml apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: destinationpolicies.config.istio.io labels: app: istio-pilot spec: group: config.istio.io names: kind: DestinationPolicy listKind: DestinationPolicyList plural: destinationpolicies singular: destinationpolicy scope: Namespaced version: v1alpha2 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: egressrules.config.istio.io labels: app: istio-pilot spec: group: config.istio.io names: kind: EgressRule listKind: EgressRuleList plural: egressrules singular: egressrule scope: Namespaced version: v1alpha2 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: routerules.config.istio.io labels: app: istio-pilot spec: group: config.istio.io names: kind: RouteRule listKind: RouteRuleList plural: routerules singular: routerule scope: Namespaced version: v1alpha2 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: virtualservices.networking.istio.io labels: app: istio-pilot spec: group: networking.istio.io names: kind: VirtualService listKind: VirtualServiceList plural: virtualservices singular: virtualservice scope: Namespaced version: v1alpha3 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: destinationrules.networking.istio.io labels: app: istio-pilot spec: group: networking.istio.io names: kind: DestinationRule listKind: DestinationRuleList plural: destinationrules singular: destinationrule scope: Namespaced version: v1alpha3 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: serviceentries.networking.istio.io labels: app: istio-pilot spec: group: networking.istio.io names: kind: ServiceEntry listKind: ServiceEntryList plural: serviceentries singular: serviceentry scope: Namespaced version: v1alpha3 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: gateways.networking.istio.io labels: app: istio-pilot spec: group: networking.istio.io names: kind: Gateway plural: gateways singular: gateway scope: Namespaced version: v1alpha3 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: policies.authentication.istio.io spec: group: authentication.istio.io names: kind: Policy plural: policies singular: policy scope: Namespaced version: v1alpha1 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: httpapispecbindings.config.istio.io spec: group: config.istio.io names: kind: HTTPAPISpecBinding plural: httpapispecbindings singular: httpapispecbinding scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: httpapispecs.config.istio.io spec: group: config.istio.io names: kind: HTTPAPISpec plural: httpapispecs singular: httpapispec scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: quotaspecbindings.config.istio.io spec: group: config.istio.io names: kind: QuotaSpecBinding plural: quotaspecbindings singular: quotaspecbinding scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: quotaspecs.config.istio.io spec: group: config.istio.io names: kind: QuotaSpec plural: quotaspecs singular: quotaspec scope: Namespaced version: v1alpha2 --- # Source: istio/charts/mixer/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-mixer-istio-system namespace: istio-system labels: app: mixer chart: mixer-0.8.0 heritage: Tiller release: RELEASE-NAME rules: - apiGroups: ["config.istio.io"] # istio CRD watcher resources: ["*"] verbs: ["create", "get", "list", "watch", "patch"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["configmaps", "endpoints", "pods", "services", "namespaces", "secrets"] verbs: ["get", "list", "watch"] --- # Source: istio/charts/pilot/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-pilot-istio-system namespace: istio-system labels: app: istio-pilot chart: pilot-0.8.0 heritage: Tiller release: RELEASE-NAME rules: - apiGroups: ["config.istio.io"] resources: ["*"] verbs: ["*"] - apiGroups: ["networking.istio.io"] resources: ["*"] verbs: ["*"] - apiGroups: ["authentication.istio.io"] resources: ["*"] verbs: ["*"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["*"] - apiGroups: ["extensions"] resources: ["thirdpartyresources", "thirdpartyresources.extensions", "ingresses", "ingresses/status"] verbs: ["*"] - apiGroups: [""] resources: ["configmaps"] verbs: ["create", "get", "list", "watch", "update"] - apiGroups: [""] resources: ["endpoints", "pods", "services"] verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["namespaces", "nodes", "secrets"] verbs: ["get", "list", "watch"] --- # Source: istio/charts/prometheus/templates/clusterrole.yaml --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: prometheus-istio-system namespace: istio-system rules: - apiGroups: [""] resources: - nodes - services - endpoints - pods - nodes/proxy verbs: ["get", "list", "watch"] - apiGroups: [""] resources: - configmaps verbs: ["get"] - nonResourceURLs: ["/metrics"] verbs: ["get"] --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: prometheus-istio-system namespace: istio-system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus-istio-system subjects: - kind: ServiceAccount name: prometheus namespace: istio-system --- --- # Source: istio/charts/security/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-citadel-istio-system namespace: istio-system labels: app: security chart: security-0.8.0 heritage: Tiller release: RELEASE-NAME rules: - apiGroups: [""] resources: ["secrets"] verbs: ["create", "get", "watch", "list", "update", "delete"] - apiGroups: [""] resources: ["serviceaccounts"] verbs: ["get", "watch", "list"] - apiGroups: [""] resources: ["services"] verbs: ["get", "watch", "list"] --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: Role metadata: name: istio-cleanup-old-ca-istio-system namespace: istio-system labels: app: security chart: security-0.8.0 heritage: Tiller release: RELEASE-NAME rules: - apiGroups: [""] resources: ["deployments", "serviceaccounts", "services"] verbs: ["get", "delete"] - apiGroups: ["extensions"] resources: ["deployments", "replicasets"] verbs: ["get", "list", "update", "delete"] --- # Source: istio/charts/sidecarInjectorWebhook/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-sidecar-injector-istio-system labels: app: istio-sidecar-injector chart: sidecarInjectorWebhook-0.8.0 heritage: Tiller release: RELEASE-NAME rules: - apiGroups: ["*"] resources: ["configmaps"] verbs: ["get", "list", "watch"] - apiGroups: ["admissionregistration.k8s.io"] resources: ["mutatingwebhookconfigurations"] verbs: ["get", "list", "watch", "patch"] --- # Source: istio/charts/mixer/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-mixer-admin-role-binding-istio-system labels: app: mixer chart: mixer-0.8.0 heritage: Tiller release: RELEASE-NAME roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-mixer-istio-system subjects: - kind: ServiceAccount name: istio-mixer-service-account namespace: istio-system --- # Source: istio/charts/pilot/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-pilot-istio-system labels: app: istio-pilot chart: pilot-0.8.0 heritage: Tiller release: RELEASE-NAME roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-pilot-istio-system subjects: - kind: ServiceAccount name: istio-pilot-service-account namespace: istio-system --- # Source: istio/charts/security/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-citadel-istio-system labels: app: security chart: security-0.8.0 heritage: Tiller release: RELEASE-NAME roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-citadel-istio-system subjects: - kind: ServiceAccount name: istio-citadel-service-account namespace: istio-system --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: RoleBinding metadata: name: istio-cleanup-old-ca-istio-system namespace: istio-system labels: app: security chart: security-0.8.0 heritage: Tiller release: RELEASE-NAME roleRef: apiGroup: rbac.authorization.k8s.io kind: Role name: istio-cleanup-old-ca-istio-system subjects: - kind: ServiceAccount name: istio-cleanup-old-ca-service-account namespace: istio-system --- # Source: istio/charts/sidecarInjectorWebhook/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-sidecar-injector-admin-role-binding-istio-system labels: app: istio-sidecar-injector chart: sidecarInjectorWebhook-0.8.0 heritage: Tiller release: RELEASE-NAME roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-sidecar-injector-istio-system subjects: - kind: ServiceAccount name: istio-sidecar-injector-service-account namespace: istio-system --- # Source: istio/charts/egressgateway/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-egressgateway namespace: istio-system labels: chart: egressgateway-0.8.0 release: RELEASE-NAME heritage: Tiller istio: egressgateway spec: type: ClusterIP selector: istio: egressgateway ports: - name: http port: 80 - name: https port: 443 --- # Source: istio/charts/grafana/templates/service.yaml apiVersion: v1 kind: Service metadata: name: grafana namespace: istio-system annotations: auth.istio.io/3000: NONE labels: app: grafana chart: grafana-0.1.0 release: RELEASE-NAME heritage: Tiller spec: type: ClusterIP ports: - port: 3000 targetPort: 3000 protocol: TCP name: http selector: app: grafana --- # Source: istio/charts/ingressgateway/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-ingressgateway namespace: istio-system labels: chart: ingressgateway-0.8.0 release: RELEASE-NAME heritage: Tiller istio: ingressgateway spec: type: NodePort selector: istio: ingressgateway ports: - name: http nodePort: 31380 port: 80 - name: https nodePort: 31390 port: 443 - name: tcp nodePort: 31400 port: 31400 --- # Source: istio/charts/mixer/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-policy namespace: istio-system labels: chart: mixer-0.8.0 release: RELEASE-NAME istio: mixer spec: ports: - name: grpc-mixer port: 9091 - name: grpc-mixer-mtls port: 15004 - name: http-monitoring port: 9093 selector: istio: mixer istio-mixer-type: policy --- apiVersion: v1 kind: Service metadata: name: istio-telemetry namespace: istio-system labels: chart: mixer-0.8.0 release: RELEASE-NAME istio: mixer spec: ports: - name: grpc-mixer port: 9091 - name: grpc-mixer-mtls port: 15004 - name: http-monitoring port: 9093 - name: prometheus port: 42422 selector: istio: mixer istio-mixer-type: telemetry --- --- # Source: istio/charts/mixer/templates/statsdtoprom.yaml --- apiVersion: v1 kind: Service metadata: name: istio-statsd-prom-bridge namespace: istio-system labels: chart: mixer-0.8.0 release: RELEASE-NAME istio: statsd-prom-bridge spec: ports: - name: statsd-prom port: 9102 - name: statsd-udp port: 9125 protocol: UDP selector: istio: statsd-prom-bridge --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-statsd-prom-bridge namespace: istio-system labels: chart: mixer-0.8.0 release: RELEASE-NAME istio: mixer spec: template: metadata: labels: istio: statsd-prom-bridge annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: istio-mixer-service-account volumes: - name: config-volume configMap: name: istio-statsd-prom-bridge containers: - name: statsd-prom-bridge image: "prom/statsd-exporter:latest" imagePullPolicy: IfNotPresent ports: - containerPort: 9102 - containerPort: 9125 protocol: UDP args: - '-statsd.mapping-config=/etc/statsd/mapping.conf' resources: {} volumeMounts: - name: config-volume mountPath: /etc/statsd --- # Source: istio/charts/pilot/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-pilot namespace: istio-system labels: app: istio-pilot chart: pilot-0.8.0 release: RELEASE-NAME heritage: Tiller spec: ports: - port: 15003 name: http-old-discovery # mTLS or non-mTLS depending on auth setting - port: 15005 name: https-discovery # always mTLS - port: 15007 name: http-discovery # always plain-text - port: 15010 name: grpc-xds # direct - port: 15011 name: https-xds # mTLS - port: 8080 name: http-legacy-discovery # direct - port: 9093 name: http-monitoring selector: istio: pilot --- # Source: istio/charts/prometheus/templates/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus namespace: istio-system annotations: prometheus.io/scrape: 'true' labels: name: prometheus spec: selector: app: prometheus ports: - name: http-prometheus protocol: TCP port: 9090 --- # Source: istio/charts/security/templates/service.yaml apiVersion: v1 kind: Service metadata: # we use the normal name here (e.g. 'prometheus') # as grafana is configured to use this as a data source name: istio-citadel namespace: istio-system labels: app: istio-citadel spec: ports: - name: grpc-citadel port: 8060 targetPort: 8060 protocol: TCP - name: http-monitoring port: 9093 selector: istio: citadel --- # Source: istio/charts/servicegraph/templates/service.yaml apiVersion: v1 kind: Service metadata: name: servicegraph namespace: istio-system labels: app: servicegraph chart: servicegraph-0.1.0 release: RELEASE-NAME heritage: Tiller spec: type: ClusterIP ports: - port: 8088 targetPort: 8088 protocol: TCP name: http selector: app: servicegraph --- # Source: istio/charts/sidecarInjectorWebhook/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-sidecar-injector namespace: istio-system labels: istio: sidecar-injector spec: ports: - port: 443 selector: istio: sidecar-injector --- # Source: istio/charts/egressgateway/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-egressgateway namespace: istio-system labels: app: egressgateway chart: egressgateway-0.8.0 release: RELEASE-NAME heritage: Tiller istio: egressgateway spec: replicas: template: metadata: labels: istio: egressgateway annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: istio-egressgateway-service-account containers: - name: egressgateway image: "docker.io/istio/proxyv2:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 80 - containerPort: 443 args: - proxy - router - -v - "2" - --discoveryRefreshDelay - '1s' #discoveryRefreshDelay - --drainDuration - '45s' #drainDuration - --parentShutdownDuration - '1m0s' #parentShutdownDuration - --connectTimeout - '10s' #connectTimeout - --serviceCluster - istio-egressgateway - --zipkinAddress - zipkin:9411 - --statsdUdpAddress - istio-statsd-prom-bridge:9125 - --proxyAdminPort - "15000" - --controlPlaneAuthPolicy - NONE - --discoveryAddress - istio-pilot:8080 resources: {} env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: fieldPath: status.podIP - name: ISTIO_META_POD_NAME valueFrom: fieldRef: fieldPath: metadata.name volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true volumes: - name: istio-certs secret: secretName: "istio.default" optional: true affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/grafana/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: grafana namespace: istio-system labels: app: grafana chart: grafana-0.1.0 release: RELEASE-NAME heritage: Tiller spec: replicas: 1 template: metadata: labels: app: grafana annotations: sidecar.istio.io/inject: "false" spec: containers: - name: grafana image: "docker.io/istio/grafana:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 3000 readinessProbe: httpGet: path: /login port: 3000 env: - name: GRAFANA_PORT value: "3000" - name: GF_AUTH_BASIC_ENABLED value: "false" - name: GF_AUTH_ANONYMOUS_ENABLED value: "true" - name: GF_AUTH_ANONYMOUS_ORG_ROLE value: Admin - name: GF_PATHS_DATA value: /data/grafana resources: {} volumeMounts: - name: data mountPath: /data/grafana affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x volumes: - name: data emptyDir: {} --- # Source: istio/charts/ingressgateway/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-ingressgateway namespace: istio-system labels: app: ingressgateway chart: ingressgateway-0.8.0 release: RELEASE-NAME heritage: Tiller istio: ingressgateway spec: replicas: template: metadata: labels: istio: ingressgateway annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: istio-ingressgateway-service-account containers: - name: ingressgateway image: "docker.io/istio/proxyv2:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 80 - containerPort: 443 - containerPort: 31400 args: - proxy - router - -v - "2" - --discoveryRefreshDelay - '1s' #discoveryRefreshDelay - --drainDuration - '45s' #drainDuration - --parentShutdownDuration - '1m0s' #parentShutdownDuration - --connectTimeout - '10s' #connectTimeout - --serviceCluster - istio-ingressgateway - --zipkinAddress - zipkin:9411 - --statsdUdpAddress - istio-statsd-prom-bridge:9125 - --proxyAdminPort - "15000" - --controlPlaneAuthPolicy - NONE - --discoveryAddress - istio-pilot:8080 resources: {} env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP - name: ISTIO_META_POD_NAME valueFrom: fieldRef: fieldPath: metadata.name volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true - name: ingressgateway-certs mountPath: "/etc/istio/ingressgateway-certs" readOnly: true volumes: - name: istio-certs secret: secretName: "istio.default" optional: true - name: ingressgateway-certs secret: secretName: "istio-ingressgateway-certs" optional: true affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/mixer/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-policy namespace: istio-system labels: chart: mixer-0.8.0 release: RELEASE-NAME istio: mixer spec: replicas: 1 template: metadata: labels: istio: mixer istio-mixer-type: policy annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: istio-mixer-service-account volumes: - name: istio-certs secret: secretName: istio.istio-mixer-service-account optional: true affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x containers: - name: mixer image: "docker.io/istio/mixer:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9092 - containerPort: 9093 - containerPort: 42422 args: - --address - tcp://127.0.0.1:9092 - --configStoreURL=k8s:// - --configDefaultNamespace=istio-system - --trace_zipkin_url=http://zipkin:9411/api/v1/spans resources: {} - name: istio-proxy image: "docker.io/istio/proxyv2:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9091 - containerPort: 15004 args: - proxy - --serviceCluster - istio-policy - --templateFile - /etc/istio/proxy/envoy_policy.yaml.tmpl - --controlPlaneAuthPolicy - NONE env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP resources: requests: cpu: 100m memory: 128Mi volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-telemetry namespace: istio-system labels: chart: mixer-0.8.0 release: RELEASE-NAME istio: mixer spec: replicas: 1 template: metadata: labels: istio: mixer istio-mixer-type: telemetry annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: istio-mixer-service-account volumes: - name: istio-certs secret: secretName: istio.istio-mixer-service-account optional: true containers: - name: mixer image: "docker.io/istio/mixer:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9092 - containerPort: 9093 - containerPort: 42422 args: - --address - tcp://127.0.0.1:9092 - --configStoreURL=k8s:// - --configDefaultNamespace=istio-system - --trace_zipkin_url=http://zipkin:9411/api/v1/spans resources: {} - name: istio-proxy image: "docker.io/istio/proxyv2:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9091 - containerPort: 15004 args: - proxy - --serviceCluster - istio-telemetry - --templateFile - /etc/istio/proxy/envoy_telemetry.yaml.tmpl - --controlPlaneAuthPolicy - NONE env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP resources: requests: cpu: 100m memory: 128Mi volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true --- --- # Source: istio/charts/pilot/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-pilot namespace: istio-system # TODO: default tempate doesn't have this, which one is right ? labels: app: istio-pilot chart: pilot-0.8.0 release: RELEASE-NAME heritage: Tiller istio: pilot annotations: checksum/config-volume: f8da08b6b8c170dde721efd680270b2901e750d4aa186ebb6c22bef5b78a43f9 spec: replicas: 1 template: metadata: labels: istio: pilot annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: istio-pilot-service-account containers: - name: discovery image: "docker.io/istio/pilot:0.8.0" imagePullPolicy: IfNotPresent args: - "discovery" # TODO(sdake) remove when secrets are automagically registered ports: - containerPort: 8080 - containerPort: 15010 readinessProbe: httpGet: path: /v1/registration port: 8080 initialDelaySeconds: 30 periodSeconds: 30 timeoutSeconds: 5 env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: PILOT_THROTTLE value: "500" - name: PILOT_CACHE_SQUASH value: "5" resources: {} volumeMounts: - name: config-volume mountPath: /etc/istio/config - name: istio-certs mountPath: /etc/certs readOnly: true - name: istio-proxy image: "docker.io/istio/proxyv2:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 15003 - containerPort: 15005 - containerPort: 15007 - containerPort: 15011 args: - proxy - --serviceCluster - istio-pilot - --templateFile - /etc/istio/proxy/envoy_pilot.yaml.tmpl - --controlPlaneAuthPolicy - NONE env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP resources: requests: cpu: 100m memory: 128Mi volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true volumes: - name: config-volume configMap: name: istio - name: istio-certs secret: secretName: "istio.istio-pilot-service-account" affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/prometheus/templates/deployment.yaml # TODO: the original template has service account, roles, etc apiVersion: extensions/v1beta1 kind: Deployment metadata: name: prometheus namespace: istio-system labels: app: prometheus chart: prometheus-0.1.0 release: RELEASE-NAME heritage: Tiller spec: replicas: 1 selector: matchLabels: app: prometheus template: metadata: labels: app: prometheus annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: prometheus containers: - name: prometheus image: "docker.io/prom/prometheus:latest" imagePullPolicy: IfNotPresent args: - '--storage.tsdb.retention=6h' - '--config.file=/etc/prometheus/prometheus.yml' ports: - containerPort: 9090 name: http livenessProbe: httpGet: path: /-/healthy port: 9090 readinessProbe: httpGet: path: /-/ready port: 9090 resources: {} volumeMounts: - name: config-volume mountPath: /etc/prometheus volumes: - name: config-volume configMap: name: prometheus affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/security/templates/deployment.yaml # istio CA watching all namespaces apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-citadel namespace: istio-system labels: app: security chart: security-0.8.0 release: RELEASE-NAME heritage: Tiller istio: citadel spec: replicas: 1 template: metadata: labels: istio: citadel annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: istio-citadel-service-account containers: - name: citadel image: "docker.io/istio/citadel:0.8.0" imagePullPolicy: IfNotPresent args: - --append-dns-names=true - --grpc-port=8060 - --grpc-hostname=citadel - --self-signed-ca=true - --citadel-storage-namespace=istio-system resources: {} affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/servicegraph/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: servicegraph namespace: istio-system labels: app: servicegraph chart: servicegraph-0.1.0 release: RELEASE-NAME heritage: Tiller spec: replicas: 1 template: metadata: labels: app: servicegraph annotations: sidecar.istio.io/inject: "false" spec: containers: - name: servicegraph image: "docker.io/istio/servicegraph:0.8.0" imagePullPolicy: IfNotPresent ports: - containerPort: 8088 args: - --prometheusAddr=http://prometheus:9090 livenessProbe: httpGet: path: /graph port: 8088 readinessProbe: httpGet: path: /graph port: 8088 resources: {} affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/sidecarInjectorWebhook/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-sidecar-injector namespace: istio-system labels: app: sidecarInjectorWebhook chart: sidecarInjectorWebhook-0.8.0 release: RELEASE-NAME heritage: Tiller istio: sidecar-injector spec: replicas: template: metadata: labels: istio: sidecar-injector spec: serviceAccountName: istio-sidecar-injector-service-account containers: - name: sidecar-injector-webhook image: "docker.io/istio/sidecar_injector:0.8.0" imagePullPolicy: IfNotPresent args: - --caCertFile=/etc/istio/certs/root-cert.pem - --tlsCertFile=/etc/istio/certs/cert-chain.pem - --tlsKeyFile=/etc/istio/certs/key.pem - --injectConfig=/etc/istio/inject/config - --meshConfig=/etc/istio/config/mesh - --healthCheckInterval=2s - --healthCheckFile=/health volumeMounts: - name: config-volume mountPath: /etc/istio/config readOnly: true - name: certs mountPath: /etc/istio/certs readOnly: true - name: inject-config mountPath: /etc/istio/inject readOnly: true livenessProbe: exec: command: - /usr/local/bin/sidecar-injector - probe - --probe-path=/health - --interval=2s initialDelaySeconds: 4 periodSeconds: 4 readinessProbe: exec: command: - /usr/local/bin/sidecar-injector - probe - --probe-path=/health - --interval=2s initialDelaySeconds: 4 periodSeconds: 4 volumes: - name: config-volume configMap: name: istio - name: certs secret: secretName: istio.istio-sidecar-injector-service-account - name: inject-config configMap: name: istio-sidecar-injector items: - key: config path: config affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/tracing/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-tracing namespace: istio-system labels: app: istio-tracing chart: tracing-0.1.0 release: RELEASE-NAME heritage: Tiller spec: replicas: 1 template: metadata: labels: app: jaeger annotations: sidecar.istio.io/inject: "false" spec: containers: - name: jaeger image: "jaegertracing/all-in-one:1.5" imagePullPolicy: IfNotPresent ports: - containerPort: 9411 - containerPort: 16686 - containerPort: 5775 protocol: UDP - containerPort: 6831 protocol: UDP - containerPort: 6832 protocol: UDP env: - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: COLLECTOR_ZIPKIN_HTTP_PORT value: "9411" - name: MEMORY_MAX_TRACES value: "50000" livenessProbe: httpGet: path: / port: 16686 readinessProbe: httpGet: path: / port: 16686 resources: {} affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/security/templates/cleanup-old-ca.yaml apiVersion: batch/v1 kind: Job metadata: name: istio-cleanup-old-ca namespace: istio-system annotations: "helm.sh/hook": post-install "helm.sh/hook-delete-policy": hook-succeeded labels: app: security chart: security-0.8.0 release: RELEASE-NAME heritage: Tiller spec: template: metadata: name: istio-cleanup-old-ca labels: app: security release: RELEASE-NAME spec: serviceAccountName: istio-cleanup-old-ca-service-account containers: - name: hyperkube image: "quay.io/coreos/hyperkube:v1.7.6_coreos.0" command: - /bin/bash - -c - > NS="-n istio-system"; ./kubectl get deploy istio-ca $NS; if [[ $? = 0 ]]; then ./kubectl delete deploy istio-ca $NS; fi; ./kubectl get serviceaccount istio-ca-service-account $NS; if [[ $? = 0 ]]; then ./kubectl delete serviceaccount istio-ca-service-account $NS; fi; ./kubectl get service istio-ca-ilb $NS; if [[ $? = 0 ]]; then ./kubectl delete service istio-ca-ilb $NS; fi restartPolicy: Never --- # Source: istio/charts/egressgateway/templates/autoscale.yaml apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: istio-egressgateway namespace: istio-system spec: maxReplicas: 1 minReplicas: 1 scaleTargetRef: apiVersion: apps/v1beta1 kind: Deployment name: istio-egressgateway metrics: - type: Resource resource: name: cpu targetAverageUtilization: 80 --- # Source: istio/charts/ingressgateway/templates/autoscale.yaml apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: istio-ingressgateway namespace: istio-system spec: maxReplicas: 1 minReplicas: 1 scaleTargetRef: apiVersion: apps/v1beta1 kind: Deployment name: istio-ingressgateway metrics: - type: Resource resource: name: cpu targetAverageUtilization: 80 --- # Source: istio/charts/tracing/templates/service.yaml apiVersion: v1 kind: List items: - apiVersion: v1 kind: Service metadata: name: zipkin namespace: istio-system labels: app: jaeger chart: tracing-0.1.0 release: RELEASE-NAME heritage: Tiller spec: type: ClusterIP ports: - port: 9411 targetPort: 9411 protocol: TCP name: http selector: app: jaeger - apiVersion: v1 kind: Service metadata: name: tracing namespace: istio-system labels: app: jaeger chart: tracing-0.1.0 release: RELEASE-NAME heritage: Tiller spec: ports: - name: query-http port: 80 protocol: TCP targetPort: 16686 selector: app: jaeger type: LoadBalancer --- # Source: istio/charts/sidecarInjectorWebhook/templates/mutatingwebhook.yaml apiVersion: admissionregistration.k8s.io/v1beta1 kind: MutatingWebhookConfiguration metadata: name: istio-sidecar-injector namespace: istio-system labels: app: istio-sidecar-injector chart: sidecarInjectorWebhook-0.8.0 release: RELEASE-NAME heritage: Tiller webhooks: - name: sidecar-injector.istio.io clientConfig: service: name: istio-sidecar-injector namespace: istio-system path: "/inject" caBundle: "" rules: - operations: [ "CREATE" ] apiGroups: [""] apiVersions: ["v1"] resources: ["pods"] failurePolicy: Fail namespaceSelector: matchLabels: istio-injection: enabled --- # Source: istio/charts/grafana/templates/ingress.yaml --- # Source: istio/charts/mixer/templates/config.yaml --- # Source: istio/charts/prometheus/templates/ingress.yaml --- # Source: istio/charts/servicegraph/templates/ingress.yaml --- # Source: istio/charts/tracing/templates/ingress.yaml --- # Source: istio/charts/tracing/templates/service-jaeger.yaml ================================================ FILE: examples/90_Kubernetes/istio/rendered/istio-v1.0-minikube.yml ================================================ apiVersion: v1 kind: Namespace metadata: name: istio-system --- # Source: istio/charts/galley/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: istio-galley-configuration namespace: istio-system labels: app: istio-galley chart: galley-1.0.0 release: istio heritage: Tiller istio: mixer data: validatingwebhookconfiguration.yaml: |- apiVersion: admissionregistration.k8s.io/v1beta1 kind: ValidatingWebhookConfiguration metadata: name: istio-galley namespace: istio-system labels: app: istio-galley chart: galley-1.0.0 release: istio heritage: Tiller webhooks: - name: pilot.validation.istio.io clientConfig: service: name: istio-galley namespace: istio-system path: "/admitpilot" caBundle: "" rules: - operations: - CREATE - UPDATE apiGroups: - config.istio.io apiVersions: - v1alpha2 resources: - httpapispecs - httpapispecbindings - quotaspecs - quotaspecbindings - operations: - CREATE - UPDATE apiGroups: - rbac.istio.io apiVersions: - "*" resources: - "*" - operations: - CREATE - UPDATE apiGroups: - authentication.istio.io apiVersions: - "*" resources: - "*" - operations: - CREATE - UPDATE apiGroups: - networking.istio.io apiVersions: - "*" resources: - destinationrules - envoyfilters - gateways # disabled per @costinm's request # - serviceentries - virtualservices failurePolicy: Fail - name: mixer.validation.istio.io clientConfig: service: name: istio-galley namespace: istio-system path: "/admitmixer" caBundle: "" rules: - operations: - CREATE - UPDATE apiGroups: - config.istio.io apiVersions: - v1alpha2 resources: - rules - attributemanifests - circonuses - deniers - fluentds - kubernetesenvs - listcheckers - memquotas - noops - opas - prometheuses - rbacs - servicecontrols - solarwindses - stackdrivers - statsds - stdios - apikeys - authorizations - checknothings # - kuberneteses - listentries - logentries - metrics - quotas - reportnothings - servicecontrolreports - tracespans failurePolicy: Fail --- # Source: istio/charts/mixer/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: istio-statsd-prom-bridge namespace: istio-system labels: app: istio-statsd-prom-bridge chart: mixer-1.0.0 release: istio heritage: Tiller istio: mixer data: mapping.conf: |- --- # Source: istio/charts/prometheus/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: prometheus namespace: istio-system labels: app: prometheus chart: prometheus-0.1.0 release: istio heritage: Tiller data: prometheus.yml: |- global: scrape_interval: 15s scrape_configs: - job_name: 'istio-mesh' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s kubernetes_sd_configs: - role: endpoints namespaces: names: - istio-system relabel_configs: - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-telemetry;prometheus - job_name: 'envoy' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints namespaces: names: - istio-system relabel_configs: - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-statsd-prom-bridge;statsd-prom - job_name: 'istio-policy' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints namespaces: names: - istio-system relabel_configs: - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-policy;http-monitoring - job_name: 'istio-telemetry' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints namespaces: names: - istio-system relabel_configs: - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-telemetry;http-monitoring - job_name: 'pilot' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints namespaces: names: - istio-system relabel_configs: - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-pilot;http-monitoring - job_name: 'galley' # Override the global default and scrape targets from this job every 5 seconds. scrape_interval: 5s # metrics_path defaults to '/metrics' # scheme defaults to 'http'. kubernetes_sd_configs: - role: endpoints namespaces: names: - istio-system relabel_configs: - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: istio-galley;http-monitoring # scrape config for API servers - job_name: 'kubernetes-apiservers' kubernetes_sd_configs: - role: endpoints namespaces: names: - default scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: kubernetes;https # scrape config for nodes (kubelet) - job_name: 'kubernetes-nodes' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics # Scrape config for Kubelet cAdvisor. # # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics # (those whose names begin with 'container_') have been removed from the # Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to # retrieve those metrics. # # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics" # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with # the --cadvisor-port=0 Kubelet flag). # # This job is not necessary and should be removed in Kubernetes 1.6 and # earlier versions, or it will cause the metrics to be scraped twice. - job_name: 'kubernetes-cadvisor' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor # scrape config for service endpoints. - job_name: 'kubernetes-service-endpoints' kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] action: replace target_label: __scheme__ regex: (https?) - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] action: replace target_label: kubernetes_name # Example scrape config for pods - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: pod_name --- # Source: istio/charts/security/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: istio-security-custom-resources namespace: istio-system labels: app: istio-security chart: security-1.0.0 release: istio heritage: Tiller istio: security data: custom-resources.yaml: |- run.sh: |- #!/bin/sh set -x if [ "$#" -ne "1" ]; then echo "first argument should be path to custom resource yaml" exit 1 fi pathToResourceYAML=${1} /kubectl get validatingwebhookconfiguration istio-galley 2>/dev/null if [ "$?" -eq 0 ]; then echo "istio-galley validatingwebhookconfiguration found - waiting for istio-galley deployment to be ready" while true; do /kubectl -n istio-system get deployment istio-galley 2>/dev/null if [ "$?" -eq 0 ]; then break fi sleep 1 done /kubectl -n istio-system rollout status deployment istio-galley if [ "$?" -ne 0 ]; then echo "istio-galley deployment rollout status check failed" exit 1 fi echo "istio-galley deployment ready for configuration validation" fi sleep 5 /kubectl apply -f ${pathToResourceYAML} --- # Source: istio/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: istio namespace: istio-system labels: app: istio chart: istio-1.0.0 release: istio heritage: Tiller data: mesh: |- # Set the following variable to true to disable policy checks by the Mixer. # Note that metrics will still be reported to the Mixer. disablePolicyChecks: false # Set enableTracing to false to disable request tracing. enableTracing: true # Set accessLogFile to empty string to disable access log. accessLogFile: "/dev/stdout" # # Deprecated: mixer is using EDS mixerCheckServer: istio-policy.istio-system.svc.cluster.local:9091 mixerReportServer: istio-telemetry.istio-system.svc.cluster.local:9091 # Unix Domain Socket through which envoy communicates with NodeAgent SDS to get # key/cert for mTLS. Use secret-mount files instead of SDS if set to empty. sdsUdsPath: "" # How frequently should Envoy fetch key/cert from NodeAgent. sdsRefreshDelay: 15s # defaultConfig: # # TCP connection timeout between Envoy & the application, and between Envoys. connectTimeout: 10s # ### ADVANCED SETTINGS ############# # Where should envoy's configuration be stored in the istio-proxy container configPath: "/etc/istio/proxy" binaryPath: "/usr/local/bin/envoy" # The pseudo service name used for Envoy. serviceCluster: istio-proxy # These settings that determine how long an old Envoy # process should be kept alive after an occasional reload. drainDuration: 45s parentShutdownDuration: 1m0s # # The mode used to redirect inbound connections to Envoy. This setting # has no effect on outbound traffic: iptables REDIRECT is always used for # outbound connections. # If "REDIRECT", use iptables REDIRECT to NAT and redirect to Envoy. # The "REDIRECT" mode loses source addresses during redirection. # If "TPROXY", use iptables TPROXY to redirect to Envoy. # The "TPROXY" mode preserves both the source and destination IP # addresses and ports, so that they can be used for advanced filtering # and manipulation. # The "TPROXY" mode also configures the sidecar to run with the # CAP_NET_ADMIN capability, which is required to use TPROXY. #interceptionMode: REDIRECT # # Port where Envoy listens (on local host) for admin commands # You can exec into the istio-proxy container in a pod and # curl the admin port (curl http://localhost:15000/) to obtain # diagnostic information from Envoy. See # https://lyft.github.io/envoy/docs/operations/admin.html # for more details proxyAdminPort: 15000 # # Zipkin trace collector zipkinAddress: zipkin.istio-system:9411 # # Statsd metrics collector converts statsd metrics into Prometheus metrics. statsdUdpAddress: istio-statsd-prom-bridge.istio-system:9125 # # Mutual TLS authentication between sidecars and istio control plane. controlPlaneAuthPolicy: NONE # # Address where istio Pilot service is running discoveryAddress: istio-pilot.istio-system:15007 --- # Source: istio/templates/sidecar-injector-configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: istio-sidecar-injector namespace: istio-system labels: app: istio chart: istio-1.0.0 release: istio heritage: Tiller istio: sidecar-injector data: config: |- policy: enabled template: |- initContainers: - name: istio-init image: "docker.io/istio/proxy_init:1.0.0" args: - "-p" - [[ .MeshConfig.ProxyListenPort ]] - "-u" - 1337 - "-m" - [[ or (index .ObjectMeta.Annotations "sidecar.istio.io/interceptionMode") .ProxyConfig.InterceptionMode.String ]] - "-i" [[ if (isset .ObjectMeta.Annotations "traffic.sidecar.istio.io/includeOutboundIPRanges") -]] - "[[ index .ObjectMeta.Annotations "traffic.sidecar.istio.io/includeOutboundIPRanges" ]]" [[ else -]] - "*" [[ end -]] - "-x" [[ if (isset .ObjectMeta.Annotations "traffic.sidecar.istio.io/excludeOutboundIPRanges") -]] - "[[ index .ObjectMeta.Annotations "traffic.sidecar.istio.io/excludeOutboundIPRanges" ]]" [[ else -]] - "" [[ end -]] - "-b" [[ if (isset .ObjectMeta.Annotations "traffic.sidecar.istio.io/includeInboundPorts") -]] - "[[ index .ObjectMeta.Annotations "traffic.sidecar.istio.io/includeInboundPorts" ]]" [[ else -]] - [[ range .Spec.Containers -]][[ range .Ports -]][[ .ContainerPort -]], [[ end -]][[ end -]][[ end]] - "-d" [[ if (isset .ObjectMeta.Annotations "traffic.sidecar.istio.io/excludeInboundPorts") -]] - "[[ index .ObjectMeta.Annotations "traffic.sidecar.istio.io/excludeInboundPorts" ]]" [[ else -]] - "" [[ end -]] imagePullPolicy: IfNotPresent securityContext: capabilities: add: - NET_ADMIN privileged: true restartPolicy: Always containers: - name: istio-proxy image: [[ if (isset .ObjectMeta.Annotations "sidecar.istio.io/proxyImage") -]] "[[ index .ObjectMeta.Annotations "sidecar.istio.io/proxyImage" ]]" [[ else -]] docker.io/istio/proxyv2:1.0.0 [[ end -]] args: - proxy - sidecar - --configPath - [[ .ProxyConfig.ConfigPath ]] - --binaryPath - [[ .ProxyConfig.BinaryPath ]] - --serviceCluster [[ if ne "" (index .ObjectMeta.Labels "app") -]] - [[ index .ObjectMeta.Labels "app" ]] [[ else -]] - "istio-proxy" [[ end -]] - --drainDuration - [[ formatDuration .ProxyConfig.DrainDuration ]] - --parentShutdownDuration - [[ formatDuration .ProxyConfig.ParentShutdownDuration ]] - --discoveryAddress - [[ .ProxyConfig.DiscoveryAddress ]] - --discoveryRefreshDelay - [[ formatDuration .ProxyConfig.DiscoveryRefreshDelay ]] - --zipkinAddress - [[ .ProxyConfig.ZipkinAddress ]] - --connectTimeout - [[ formatDuration .ProxyConfig.ConnectTimeout ]] - --statsdUdpAddress - [[ .ProxyConfig.StatsdUdpAddress ]] - --proxyAdminPort - [[ .ProxyConfig.ProxyAdminPort ]] - --controlPlaneAuthPolicy - [[ or (index .ObjectMeta.Annotations "sidecar.istio.io/controlPlaneAuthPolicy") .ProxyConfig.ControlPlaneAuthPolicy ]] env: - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: fieldPath: status.podIP - name: ISTIO_META_POD_NAME valueFrom: fieldRef: fieldPath: metadata.name - name: ISTIO_META_INTERCEPTION_MODE value: [[ or (index .ObjectMeta.Annotations "sidecar.istio.io/interceptionMode") .ProxyConfig.InterceptionMode.String ]] imagePullPolicy: IfNotPresent securityContext: privileged: false readOnlyRootFilesystem: true [[ if eq (or (index .ObjectMeta.Annotations "sidecar.istio.io/interceptionMode") .ProxyConfig.InterceptionMode.String) "TPROXY" -]] capabilities: add: - NET_ADMIN runAsGroup: 1337 [[ else -]] runAsUser: 1337 [[ end -]] restartPolicy: Always resources: [[ if (isset .ObjectMeta.Annotations "sidecar.istio.io/proxyCPU") -]] requests: cpu: "[[ index .ObjectMeta.Annotations "sidecar.istio.io/proxyCPU" ]]" memory: "[[ index .ObjectMeta.Annotations "sidecar.istio.io/proxyMemory" ]]" [[ else -]] requests: cpu: 10m [[ end -]] volumeMounts: - mountPath: /etc/istio/proxy name: istio-envoy - mountPath: /etc/certs/ name: istio-certs readOnly: true volumes: - emptyDir: medium: Memory name: istio-envoy - name: istio-certs secret: optional: true [[ if eq .Spec.ServiceAccountName "" -]] secretName: istio.default [[ else -]] secretName: [[ printf "istio.%s" .Spec.ServiceAccountName ]] [[ end -]] --- # Source: istio/charts/galley/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-galley-service-account namespace: istio-system labels: app: istio-galley chart: galley-1.0.0 heritage: Tiller release: istio --- # Source: istio/charts/gateways/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-egressgateway-service-account namespace: istio-system labels: app: egressgateway chart: gateways-1.0.0 heritage: Tiller release: istio --- apiVersion: v1 kind: ServiceAccount metadata: name: istio-ingressgateway-service-account namespace: istio-system labels: app: ingressgateway chart: gateways-1.0.0 heritage: Tiller release: istio --- --- # Source: istio/charts/mixer/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-mixer-service-account namespace: istio-system labels: app: mixer chart: mixer-1.0.0 heritage: Tiller release: istio --- # Source: istio/charts/pilot/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-pilot-service-account namespace: istio-system labels: app: istio-pilot chart: pilot-1.0.0 heritage: Tiller release: istio --- # Source: istio/charts/prometheus/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: prometheus namespace: istio-system --- # Source: istio/charts/security/templates/cleanup-secrets.yaml # The reason for creating a ServiceAccount and ClusterRole specifically for this # post-delete hooked job is because the citadel ServiceAccount is being deleted # before this hook is launched. On the other hand, running this hook before the # deletion of the citadel (e.g. pre-delete) won't delete the secrets because they # will be re-created immediately by the to-be-deleted citadel. # # It's also important that the ServiceAccount, ClusterRole and ClusterRoleBinding # will be ready before running the hooked Job therefore the hook weights. apiVersion: v1 kind: ServiceAccount metadata: name: istio-cleanup-secrets-service-account namespace: istio-system annotations: "helm.sh/hook": post-delete "helm.sh/hook-delete-policy": hook-succeeded "helm.sh/hook-weight": "1" labels: app: security chart: security-1.0.0 heritage: Tiller release: istio --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-cleanup-secrets-istio-system annotations: "helm.sh/hook": post-delete "helm.sh/hook-delete-policy": hook-succeeded "helm.sh/hook-weight": "1" labels: app: security chart: security-1.0.0 heritage: Tiller release: istio rules: - apiGroups: [""] resources: ["secrets"] verbs: ["list", "delete"] --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-cleanup-secrets-istio-system annotations: "helm.sh/hook": post-delete "helm.sh/hook-delete-policy": hook-succeeded "helm.sh/hook-weight": "2" labels: app: security chart: security-1.0.0 heritage: Tiller release: istio roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-cleanup-secrets-istio-system subjects: - kind: ServiceAccount name: istio-cleanup-secrets-service-account namespace: istio-system --- apiVersion: batch/v1 kind: Job metadata: name: istio-cleanup-secrets namespace: istio-system annotations: "helm.sh/hook": post-delete "helm.sh/hook-delete-policy": hook-succeeded "helm.sh/hook-weight": "3" labels: app: security chart: security-1.0.0 release: istio heritage: Tiller spec: template: metadata: name: istio-cleanup-secrets labels: app: security release: istio spec: serviceAccountName: istio-cleanup-secrets-service-account containers: - name: hyperkube image: "quay.io/coreos/hyperkube:v1.7.6_coreos.0" command: - /bin/bash - -c - > kubectl get secret --all-namespaces | grep "istio.io/key-and-cert" | while read -r entry; do ns=$(echo $entry | awk '{print $1}'); name=$(echo $entry | awk '{print $2}'); kubectl delete secret $name -n $ns; done restartPolicy: OnFailure --- # Source: istio/charts/security/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-citadel-service-account namespace: istio-system labels: app: security chart: security-1.0.0 heritage: Tiller release: istio --- # Source: istio/charts/sidecarInjectorWebhook/templates/serviceaccount.yaml apiVersion: v1 kind: ServiceAccount metadata: name: istio-sidecar-injector-service-account namespace: istio-system labels: app: istio-sidecar-injector chart: sidecarInjectorWebhook-1.0.0 heritage: Tiller release: istio --- # Source: istio/templates/crds.yaml # # these CRDs only make sense when pilot is enabled # apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: virtualservices.networking.istio.io annotations: "helm.sh/hook": crd-install labels: app: istio-pilot spec: group: networking.istio.io names: kind: VirtualService listKind: VirtualServiceList plural: virtualservices singular: virtualservice categories: - istio-io - networking-istio-io scope: Namespaced version: v1alpha3 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: destinationrules.networking.istio.io annotations: "helm.sh/hook": crd-install labels: app: istio-pilot spec: group: networking.istio.io names: kind: DestinationRule listKind: DestinationRuleList plural: destinationrules singular: destinationrule categories: - istio-io - networking-istio-io scope: Namespaced version: v1alpha3 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: serviceentries.networking.istio.io annotations: "helm.sh/hook": crd-install labels: app: istio-pilot spec: group: networking.istio.io names: kind: ServiceEntry listKind: ServiceEntryList plural: serviceentries singular: serviceentry categories: - istio-io - networking-istio-io scope: Namespaced version: v1alpha3 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: gateways.networking.istio.io annotations: "helm.sh/hook": crd-install "helm.sh/hook-weight": "-5" labels: app: istio-pilot spec: group: networking.istio.io names: kind: Gateway plural: gateways singular: gateway categories: - istio-io - networking-istio-io scope: Namespaced version: v1alpha3 --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: envoyfilters.networking.istio.io annotations: "helm.sh/hook": crd-install labels: app: istio-pilot spec: group: networking.istio.io names: kind: EnvoyFilter plural: envoyfilters singular: envoyfilter categories: - istio-io - networking-istio-io scope: Namespaced version: v1alpha3 --- # # these CRDs only make sense when security is enabled # # kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: annotations: "helm.sh/hook": crd-install name: httpapispecbindings.config.istio.io spec: group: config.istio.io names: kind: HTTPAPISpecBinding plural: httpapispecbindings singular: httpapispecbinding categories: - istio-io - apim-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: annotations: "helm.sh/hook": crd-install name: httpapispecs.config.istio.io spec: group: config.istio.io names: kind: HTTPAPISpec plural: httpapispecs singular: httpapispec categories: - istio-io - apim-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: annotations: "helm.sh/hook": crd-install name: quotaspecbindings.config.istio.io spec: group: config.istio.io names: kind: QuotaSpecBinding plural: quotaspecbindings singular: quotaspecbinding categories: - istio-io - apim-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: annotations: "helm.sh/hook": crd-install name: quotaspecs.config.istio.io spec: group: config.istio.io names: kind: QuotaSpec plural: quotaspecs singular: quotaspec categories: - istio-io - apim-istio-io scope: Namespaced version: v1alpha2 --- # Mixer CRDs kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: rules.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: istio.io.mixer istio: core spec: group: config.istio.io names: kind: rule plural: rules singular: rule categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: attributemanifests.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: istio.io.mixer istio: core spec: group: config.istio.io names: kind: attributemanifest plural: attributemanifests singular: attributemanifest categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: bypasses.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: bypass istio: mixer-adapter spec: group: config.istio.io names: kind: bypass plural: bypasses singular: bypass categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: circonuses.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: circonus istio: mixer-adapter spec: group: config.istio.io names: kind: circonus plural: circonuses singular: circonus categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: deniers.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: denier istio: mixer-adapter spec: group: config.istio.io names: kind: denier plural: deniers singular: denier categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: fluentds.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: fluentd istio: mixer-adapter spec: group: config.istio.io names: kind: fluentd plural: fluentds singular: fluentd categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: kubernetesenvs.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: kubernetesenv istio: mixer-adapter spec: group: config.istio.io names: kind: kubernetesenv plural: kubernetesenvs singular: kubernetesenv categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: listcheckers.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: listchecker istio: mixer-adapter spec: group: config.istio.io names: kind: listchecker plural: listcheckers singular: listchecker categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: memquotas.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: memquota istio: mixer-adapter spec: group: config.istio.io names: kind: memquota plural: memquotas singular: memquota categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: noops.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: noop istio: mixer-adapter spec: group: config.istio.io names: kind: noop plural: noops singular: noop categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: opas.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: opa istio: mixer-adapter spec: group: config.istio.io names: kind: opa plural: opas singular: opa categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: prometheuses.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: prometheus istio: mixer-adapter spec: group: config.istio.io names: kind: prometheus plural: prometheuses singular: prometheus categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: rbacs.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: rbac istio: mixer-adapter spec: group: config.istio.io names: kind: rbac plural: rbacs singular: rbac categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: redisquotas.config.istio.io annotations: "helm.sh/hook": crd-install labels: package: redisquota istio: mixer-adapter spec: group: config.istio.io names: kind: redisquota plural: redisquotas singular: redisquota scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: servicecontrols.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: servicecontrol istio: mixer-adapter spec: group: config.istio.io names: kind: servicecontrol plural: servicecontrols singular: servicecontrol categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: signalfxs.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: signalfx istio: mixer-adapter spec: group: config.istio.io names: kind: signalfx plural: signalfxs singular: signalfx categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: solarwindses.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: solarwinds istio: mixer-adapter spec: group: config.istio.io names: kind: solarwinds plural: solarwindses singular: solarwinds categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: stackdrivers.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: stackdriver istio: mixer-adapter spec: group: config.istio.io names: kind: stackdriver plural: stackdrivers singular: stackdriver categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: statsds.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: statsd istio: mixer-adapter spec: group: config.istio.io names: kind: statsd plural: statsds singular: statsd categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: stdios.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: stdio istio: mixer-adapter spec: group: config.istio.io names: kind: stdio plural: stdios singular: stdio categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: apikeys.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: apikey istio: mixer-instance spec: group: config.istio.io names: kind: apikey plural: apikeys singular: apikey categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: authorizations.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: authorization istio: mixer-instance spec: group: config.istio.io names: kind: authorization plural: authorizations singular: authorization categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: checknothings.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: checknothing istio: mixer-instance spec: group: config.istio.io names: kind: checknothing plural: checknothings singular: checknothing categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: kuberneteses.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: adapter.template.kubernetes istio: mixer-instance spec: group: config.istio.io names: kind: kubernetes plural: kuberneteses singular: kubernetes categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: listentries.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: listentry istio: mixer-instance spec: group: config.istio.io names: kind: listentry plural: listentries singular: listentry categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: logentries.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: logentry istio: mixer-instance spec: group: config.istio.io names: kind: logentry plural: logentries singular: logentry categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: edges.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: edge istio: mixer-instance spec: group: config.istio.io names: kind: edge plural: edges singular: edge categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: metrics.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: metric istio: mixer-instance spec: group: config.istio.io names: kind: metric plural: metrics singular: metric categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: quotas.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: quota istio: mixer-instance spec: group: config.istio.io names: kind: quota plural: quotas singular: quota categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: reportnothings.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: reportnothing istio: mixer-instance spec: group: config.istio.io names: kind: reportnothing plural: reportnothings singular: reportnothing categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: servicecontrolreports.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: servicecontrolreport istio: mixer-instance spec: group: config.istio.io names: kind: servicecontrolreport plural: servicecontrolreports singular: servicecontrolreport categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: tracespans.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: tracespan istio: mixer-instance spec: group: config.istio.io names: kind: tracespan plural: tracespans singular: tracespan categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: rbacconfigs.rbac.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: istio.io.mixer istio: rbac spec: group: rbac.istio.io names: kind: RbacConfig plural: rbacconfigs singular: rbacconfig categories: - istio-io - rbac-istio-io scope: Namespaced version: v1alpha1 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: serviceroles.rbac.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: istio.io.mixer istio: rbac spec: group: rbac.istio.io names: kind: ServiceRole plural: serviceroles singular: servicerole categories: - istio-io - rbac-istio-io scope: Namespaced version: v1alpha1 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: servicerolebindings.rbac.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: istio.io.mixer istio: rbac spec: group: rbac.istio.io names: kind: ServiceRoleBinding plural: servicerolebindings singular: servicerolebinding categories: - istio-io - rbac-istio-io scope: Namespaced version: v1alpha1 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: adapters.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: adapter istio: mixer-adapter spec: group: config.istio.io names: kind: adapter plural: adapters singular: adapter categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: instances.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: instance istio: mixer-instance spec: group: config.istio.io names: kind: instance plural: instances singular: instance categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: templates.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: template istio: mixer-template spec: group: config.istio.io names: kind: template plural: templates singular: template categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- kind: CustomResourceDefinition apiVersion: apiextensions.k8s.io/v1beta1 metadata: name: handlers.config.istio.io annotations: "helm.sh/hook": crd-install labels: app: mixer package: handler istio: mixer-handler spec: group: config.istio.io names: kind: handler plural: handlers singular: handler categories: - istio-io - policy-istio-io scope: Namespaced version: v1alpha2 --- # # --- # Source: istio/charts/galley/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-galley-istio-system labels: app: istio-galley chart: galley-1.0.0 heritage: Tiller release: istio rules: - apiGroups: ["admissionregistration.k8s.io"] resources: ["validatingwebhookconfigurations"] verbs: ["*"] - apiGroups: ["config.istio.io"] # istio mixer CRD watcher resources: ["*"] verbs: ["get", "list", "watch"] - apiGroups: ["*"] resources: ["deployments"] resourceNames: ["istio-galley"] verbs: ["get"] --- # Source: istio/charts/gateways/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: labels: app: gateways chart: gateways-1.0.0 heritage: Tiller release: istio name: istio-egressgateway-istio-system rules: - apiGroups: ["extensions"] resources: ["thirdpartyresources", "virtualservices", "destinationrules", "gateways"] verbs: ["get", "watch", "list", "update"] --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: labels: app: gateways chart: gateways-1.0.0 heritage: Tiller release: istio name: istio-ingressgateway-istio-system rules: - apiGroups: ["extensions"] resources: ["thirdpartyresources", "virtualservices", "destinationrules", "gateways"] verbs: ["get", "watch", "list", "update"] --- --- # Source: istio/charts/mixer/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-mixer-istio-system labels: app: mixer chart: mixer-1.0.0 heritage: Tiller release: istio rules: - apiGroups: ["config.istio.io"] # istio CRD watcher resources: ["*"] verbs: ["create", "get", "list", "watch", "patch"] - apiGroups: ["rbac.istio.io"] # istio RBAC watcher resources: ["*"] verbs: ["get", "list", "watch"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["configmaps", "endpoints", "pods", "services", "namespaces", "secrets"] verbs: ["get", "list", "watch"] - apiGroups: ["extensions"] resources: ["replicasets"] verbs: ["get", "list", "watch"] - apiGroups: ["apps"] resources: ["replicasets"] verbs: ["get", "list", "watch"] --- # Source: istio/charts/pilot/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-pilot-istio-system labels: app: istio-pilot chart: pilot-1.0.0 heritage: Tiller release: istio rules: - apiGroups: ["config.istio.io"] resources: ["*"] verbs: ["*"] - apiGroups: ["rbac.istio.io"] resources: ["*"] verbs: ["get", "watch", "list"] - apiGroups: ["networking.istio.io"] resources: ["*"] verbs: ["*"] - apiGroups: ["authentication.istio.io"] resources: ["*"] verbs: ["*"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["*"] - apiGroups: ["extensions"] resources: ["thirdpartyresources", "thirdpartyresources.extensions", "ingresses", "ingresses/status"] verbs: ["*"] - apiGroups: [""] resources: ["configmaps"] verbs: ["create", "get", "list", "watch", "update"] - apiGroups: [""] resources: ["endpoints", "pods", "services"] verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["namespaces", "nodes", "secrets"] verbs: ["get", "list", "watch"] --- # Source: istio/charts/prometheus/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: prometheus-istio-system rules: - apiGroups: [""] resources: - nodes - services - endpoints - pods - nodes/proxy verbs: ["get", "list", "watch"] - apiGroups: [""] resources: - configmaps verbs: ["get"] - nonResourceURLs: ["/metrics"] verbs: ["get"] --- # Source: istio/charts/security/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-citadel-istio-system labels: app: security chart: security-1.0.0 heritage: Tiller release: istio rules: - apiGroups: [""] resources: ["secrets"] verbs: ["create", "get", "watch", "list", "update", "delete"] - apiGroups: [""] resources: ["serviceaccounts"] verbs: ["get", "watch", "list"] - apiGroups: [""] resources: ["services"] verbs: ["get", "watch", "list"] --- # Source: istio/charts/sidecarInjectorWebhook/templates/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: istio-sidecar-injector-istio-system labels: app: istio-sidecar-injector chart: sidecarInjectorWebhook-1.0.0 heritage: Tiller release: istio rules: - apiGroups: ["*"] resources: ["configmaps"] verbs: ["get", "list", "watch"] - apiGroups: ["admissionregistration.k8s.io"] resources: ["mutatingwebhookconfigurations"] verbs: ["get", "list", "watch", "patch"] --- # Source: istio/charts/galley/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-galley-admin-role-binding-istio-system labels: app: istio-galley chart: galley-1.0.0 heritage: Tiller release: istio roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-galley-istio-system subjects: - kind: ServiceAccount name: istio-galley-service-account namespace: istio-system --- # Source: istio/charts/gateways/templates/clusterrolebindings.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-egressgateway-istio-system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-egressgateway-istio-system subjects: - kind: ServiceAccount name: istio-egressgateway-service-account namespace: istio-system --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-ingressgateway-istio-system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-ingressgateway-istio-system subjects: - kind: ServiceAccount name: istio-ingressgateway-service-account namespace: istio-system --- --- # Source: istio/charts/mixer/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-mixer-admin-role-binding-istio-system labels: app: mixer chart: mixer-1.0.0 heritage: Tiller release: istio roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-mixer-istio-system subjects: - kind: ServiceAccount name: istio-mixer-service-account namespace: istio-system --- # Source: istio/charts/pilot/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-pilot-istio-system labels: app: istio-pilot chart: pilot-1.0.0 heritage: Tiller release: istio roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-pilot-istio-system subjects: - kind: ServiceAccount name: istio-pilot-service-account namespace: istio-system --- # Source: istio/charts/prometheus/templates/clusterrolebindings.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: prometheus-istio-system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus-istio-system subjects: - kind: ServiceAccount name: prometheus namespace: istio-system --- # Source: istio/charts/security/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-citadel-istio-system labels: app: security chart: security-1.0.0 heritage: Tiller release: istio roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-citadel-istio-system subjects: - kind: ServiceAccount name: istio-citadel-service-account namespace: istio-system --- # Source: istio/charts/sidecarInjectorWebhook/templates/clusterrolebinding.yaml apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: istio-sidecar-injector-admin-role-binding-istio-system labels: app: istio-sidecar-injector chart: sidecarInjectorWebhook-1.0.0 heritage: Tiller release: istio roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: istio-sidecar-injector-istio-system subjects: - kind: ServiceAccount name: istio-sidecar-injector-service-account namespace: istio-system --- # Source: istio/charts/galley/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-galley namespace: istio-system labels: istio: galley spec: ports: - port: 443 name: https-validation - port: 9093 name: http-monitoring selector: istio: galley --- # Source: istio/charts/gateways/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-egressgateway namespace: istio-system annotations: labels: chart: gateways-1.0.0 release: istio heritage: Tiller app: istio-egressgateway istio: egressgateway spec: type: NodePort selector: app: istio-egressgateway istio: egressgateway ports: - name: http2 port: 80 - name: https port: 443 --- apiVersion: v1 kind: Service metadata: name: istio-ingressgateway namespace: istio-system annotations: labels: chart: gateways-1.0.0 release: istio heritage: Tiller app: istio-ingressgateway istio: ingressgateway spec: type: NodePort selector: app: istio-ingressgateway istio: ingressgateway ports: - name: http2 nodePort: 31380 port: 80 targetPort: 80 - name: https nodePort: 31390 port: 443 - name: tcp nodePort: 31400 port: 31400 - name: tcp-pilot-grpc-tls port: 15011 targetPort: 15011 - name: tcp-citadel-grpc-tls port: 8060 targetPort: 8060 - name: http2-prometheus port: 15030 targetPort: 15030 - name: http2-grafana port: 15031 targetPort: 15031 --- --- # Source: istio/charts/mixer/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-policy namespace: istio-system labels: chart: mixer-1.0.0 release: istio istio: mixer spec: ports: - name: grpc-mixer port: 9091 - name: grpc-mixer-mtls port: 15004 - name: http-monitoring port: 9093 selector: istio: mixer istio-mixer-type: policy --- apiVersion: v1 kind: Service metadata: name: istio-telemetry namespace: istio-system labels: chart: mixer-1.0.0 release: istio istio: mixer spec: ports: - name: grpc-mixer port: 9091 - name: grpc-mixer-mtls port: 15004 - name: http-monitoring port: 9093 - name: prometheus port: 42422 selector: istio: mixer istio-mixer-type: telemetry --- --- # Source: istio/charts/mixer/templates/statsdtoprom.yaml --- apiVersion: v1 kind: Service metadata: name: istio-statsd-prom-bridge namespace: istio-system labels: chart: mixer-1.0.0 release: istio istio: statsd-prom-bridge spec: ports: - name: statsd-prom port: 9102 - name: statsd-udp port: 9125 protocol: UDP selector: istio: statsd-prom-bridge --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-statsd-prom-bridge namespace: istio-system labels: chart: mixer-1.0.0 release: istio istio: mixer spec: template: metadata: labels: istio: statsd-prom-bridge annotations: sidecar.istio.io/inject: "false" spec: serviceAccountName: istio-mixer-service-account volumes: - name: config-volume configMap: name: istio-statsd-prom-bridge containers: - name: statsd-prom-bridge image: "docker.io/prom/statsd-exporter:v0.6.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9102 - containerPort: 9125 protocol: UDP args: - '-statsd.mapping-config=/etc/statsd/mapping.conf' resources: requests: cpu: 10m volumeMounts: - name: config-volume mountPath: /etc/statsd --- # Source: istio/charts/pilot/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-pilot namespace: istio-system labels: app: istio-pilot chart: pilot-1.0.0 release: istio heritage: Tiller spec: ports: - port: 15010 name: grpc-xds # direct - port: 15011 name: https-xds # mTLS - port: 8080 name: http-legacy-discovery # direct - port: 9093 name: http-monitoring selector: istio: pilot --- # Source: istio/charts/prometheus/templates/service.yaml apiVersion: v1 kind: Service metadata: name: prometheus namespace: istio-system annotations: prometheus.io/scrape: 'true' labels: name: prometheus spec: selector: app: prometheus ports: - name: http-prometheus protocol: TCP port: 9090 --- # Source: istio/charts/security/templates/service.yaml apiVersion: v1 kind: Service metadata: # we use the normal name here (e.g. 'prometheus') # as grafana is configured to use this as a data source name: istio-citadel namespace: istio-system labels: app: istio-citadel spec: ports: - name: grpc-citadel port: 8060 targetPort: 8060 protocol: TCP - name: http-monitoring port: 9093 selector: istio: citadel --- # Source: istio/charts/sidecarInjectorWebhook/templates/service.yaml apiVersion: v1 kind: Service metadata: name: istio-sidecar-injector namespace: istio-system labels: istio: sidecar-injector spec: ports: - port: 443 selector: istio: sidecar-injector --- # Source: istio/charts/galley/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-galley namespace: istio-system labels: app: galley chart: galley-1.0.0 release: istio heritage: Tiller istio: galley spec: replicas: 1 strategy: rollingUpdate: maxSurge: 1 maxUnavailable: 0 template: metadata: labels: istio: galley annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: istio-galley-service-account containers: - name: validator image: "docker.io/istio/galley:1.0.0" imagePullPolicy: IfNotPresent ports: - containerPort: 443 - containerPort: 9093 command: - /usr/local/bin/galley - validator - --deployment-namespace=istio-system - --caCertFile=/etc/istio/certs/root-cert.pem - --tlsCertFile=/etc/istio/certs/cert-chain.pem - --tlsKeyFile=/etc/istio/certs/key.pem - --healthCheckInterval=2s - --healthCheckFile=/health - --webhook-config-file - /etc/istio/config/validatingwebhookconfiguration.yaml volumeMounts: - name: certs mountPath: /etc/istio/certs readOnly: true - name: config mountPath: /etc/istio/config readOnly: true livenessProbe: exec: command: - /usr/local/bin/galley - probe - --probe-path=/health - --interval=4s initialDelaySeconds: 4 periodSeconds: 4 readinessProbe: exec: command: - /usr/local/bin/galley - probe - --probe-path=/health - --interval=4s initialDelaySeconds: 4 periodSeconds: 4 resources: requests: cpu: 10m volumes: - name: certs secret: secretName: istio.istio-galley-service-account - name: config configMap: name: istio-galley-configuration affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/gateways/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-egressgateway namespace: istio-system labels: app: egressgateway chart: gateways-1.0.0 release: istio heritage: Tiller app: istio-egressgateway istio: egressgateway spec: replicas: 1 template: metadata: labels: app: istio-egressgateway istio: egressgateway annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: istio-egressgateway-service-account containers: - name: egressgateway image: "docker.io/istio/proxyv2:1.0.0" imagePullPolicy: IfNotPresent ports: - containerPort: 80 - containerPort: 443 args: - proxy - router - -v - "2" - --discoveryRefreshDelay - '1s' #discoveryRefreshDelay - --drainDuration - '45s' #drainDuration - --parentShutdownDuration - '1m0s' #parentShutdownDuration - --connectTimeout - '10s' #connectTimeout - --serviceCluster - istio-egressgateway - --zipkinAddress - zipkin:9411 - --statsdUdpAddress - istio-statsd-prom-bridge:9125 - --proxyAdminPort - "15000" - --controlPlaneAuthPolicy - NONE - --discoveryAddress - istio-pilot.istio-system:8080 resources: requests: cpu: 10m env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP - name: ISTIO_META_POD_NAME valueFrom: fieldRef: fieldPath: metadata.name volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true - name: egressgateway-certs mountPath: "/etc/istio/egressgateway-certs" readOnly: true - name: egressgateway-ca-certs mountPath: "/etc/istio/egressgateway-ca-certs" readOnly: true volumes: - name: istio-certs secret: secretName: istio.istio-egressgateway-service-account optional: true - name: egressgateway-certs secret: secretName: "istio-egressgateway-certs" optional: true - name: egressgateway-ca-certs secret: secretName: "istio-egressgateway-ca-certs" optional: true affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-ingressgateway namespace: istio-system labels: app: ingressgateway chart: gateways-1.0.0 release: istio heritage: Tiller app: istio-ingressgateway istio: ingressgateway spec: replicas: 1 template: metadata: labels: app: istio-ingressgateway istio: ingressgateway annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: istio-ingressgateway-service-account containers: - name: ingressgateway image: "docker.io/istio/proxyv2:1.0.0" imagePullPolicy: IfNotPresent ports: - containerPort: 80 - containerPort: 443 - containerPort: 31400 - containerPort: 15011 - containerPort: 8060 - containerPort: 15030 - containerPort: 15031 args: - proxy - router - -v - "2" - --discoveryRefreshDelay - '1s' #discoveryRefreshDelay - --drainDuration - '45s' #drainDuration - --parentShutdownDuration - '1m0s' #parentShutdownDuration - --connectTimeout - '10s' #connectTimeout - --serviceCluster - istio-ingressgateway - --zipkinAddress - zipkin:9411 - --statsdUdpAddress - istio-statsd-prom-bridge:9125 - --proxyAdminPort - "15000" - --controlPlaneAuthPolicy - NONE - --discoveryAddress - istio-pilot.istio-system:8080 resources: requests: cpu: 10m env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP - name: ISTIO_META_POD_NAME valueFrom: fieldRef: fieldPath: metadata.name volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true - name: ingressgateway-certs mountPath: "/etc/istio/ingressgateway-certs" readOnly: true - name: ingressgateway-ca-certs mountPath: "/etc/istio/ingressgateway-ca-certs" readOnly: true volumes: - name: istio-certs secret: secretName: istio.istio-ingressgateway-service-account optional: true - name: ingressgateway-certs secret: secretName: "istio-ingressgateway-certs" optional: true - name: ingressgateway-ca-certs secret: secretName: "istio-ingressgateway-ca-certs" optional: true affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- --- # Source: istio/charts/mixer/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-policy namespace: istio-system labels: chart: mixer-1.0.0 release: istio istio: mixer spec: replicas: 1 template: metadata: labels: app: policy istio: mixer istio-mixer-type: policy annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: istio-mixer-service-account volumes: - name: istio-certs secret: secretName: istio.istio-mixer-service-account optional: true - name: uds-socket emptyDir: {} affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x containers: - name: mixer image: "docker.io/istio/mixer:1.0.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9093 - containerPort: 42422 args: - --address - unix:///sock/mixer.socket - --configStoreURL=k8s:// - --configDefaultNamespace=istio-system - --trace_zipkin_url=http://zipkin:9411/api/v1/spans resources: requests: cpu: 10m volumeMounts: - name: uds-socket mountPath: /sock livenessProbe: httpGet: path: /version port: 9093 initialDelaySeconds: 5 periodSeconds: 5 - name: istio-proxy image: "docker.io/istio/proxyv2:1.0.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9091 - containerPort: 15004 args: - proxy - --serviceCluster - istio-policy - --templateFile - /etc/istio/proxy/envoy_policy.yaml.tmpl - --controlPlaneAuthPolicy - NONE env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP resources: requests: cpu: 10m volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true - name: uds-socket mountPath: /sock --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-telemetry namespace: istio-system labels: chart: mixer-1.0.0 release: istio istio: mixer spec: replicas: 1 template: metadata: labels: app: telemetry istio: mixer istio-mixer-type: telemetry annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: istio-mixer-service-account volumes: - name: istio-certs secret: secretName: istio.istio-mixer-service-account optional: true - name: uds-socket emptyDir: {} containers: - name: mixer image: "docker.io/istio/mixer:1.0.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9093 - containerPort: 42422 args: - --address - unix:///sock/mixer.socket - --configStoreURL=k8s:// - --configDefaultNamespace=istio-system - --trace_zipkin_url=http://zipkin:9411/api/v1/spans resources: requests: cpu: 10m volumeMounts: - name: uds-socket mountPath: /sock livenessProbe: httpGet: path: /version port: 9093 initialDelaySeconds: 5 periodSeconds: 5 - name: istio-proxy image: "docker.io/istio/proxyv2:1.0.0" imagePullPolicy: IfNotPresent ports: - containerPort: 9091 - containerPort: 15004 args: - proxy - --serviceCluster - istio-telemetry - --templateFile - /etc/istio/proxy/envoy_telemetry.yaml.tmpl - --controlPlaneAuthPolicy - NONE env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP resources: requests: cpu: 10m volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true - name: uds-socket mountPath: /sock --- --- # Source: istio/charts/pilot/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-pilot namespace: istio-system # TODO: default template doesn't have this, which one is right ? labels: app: istio-pilot chart: pilot-1.0.0 release: istio heritage: Tiller istio: pilot annotations: checksum/config-volume: f8da08b6b8c170dde721efd680270b2901e750d4aa186ebb6c22bef5b78a43f9 spec: replicas: 1 template: metadata: labels: istio: pilot app: pilot annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: istio-pilot-service-account containers: - name: discovery image: "docker.io/istio/pilot:1.0.0" imagePullPolicy: IfNotPresent args: - "discovery" ports: - containerPort: 8080 - containerPort: 15010 readinessProbe: httpGet: path: /debug/endpointz port: 8080 initialDelaySeconds: 30 periodSeconds: 30 timeoutSeconds: 5 env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: PILOT_THROTTLE value: "500" - name: PILOT_CACHE_SQUASH value: "5" - name: PILOT_TRACE_SAMPLING value: "100" resources: requests: cpu: 500m memory: 2048Mi volumeMounts: - name: config-volume mountPath: /etc/istio/config - name: istio-certs mountPath: /etc/certs readOnly: true - name: istio-proxy image: "docker.io/istio/proxyv2:1.0.0" imagePullPolicy: IfNotPresent ports: - containerPort: 15003 - containerPort: 15005 - containerPort: 15007 - containerPort: 15011 args: - proxy - --serviceCluster - istio-pilot - --templateFile - /etc/istio/proxy/envoy_pilot.yaml.tmpl - --controlPlaneAuthPolicy - NONE env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: INSTANCE_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP resources: requests: cpu: 10m volumeMounts: - name: istio-certs mountPath: /etc/certs readOnly: true volumes: - name: config-volume configMap: name: istio - name: istio-certs secret: secretName: istio.istio-pilot-service-account affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/prometheus/templates/deployment.yaml # TODO: the original template has service account, roles, etc apiVersion: extensions/v1beta1 kind: Deployment metadata: name: prometheus namespace: istio-system labels: app: prometheus chart: prometheus-0.1.0 release: istio heritage: Tiller spec: replicas: 1 selector: matchLabels: app: prometheus template: metadata: labels: app: prometheus annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: prometheus containers: - name: prometheus image: "docker.io/prom/prometheus:v2.3.1" imagePullPolicy: IfNotPresent args: - '--storage.tsdb.retention=6h' - '--config.file=/etc/prometheus/prometheus.yml' ports: - containerPort: 9090 name: http livenessProbe: httpGet: path: /-/healthy port: 9090 readinessProbe: httpGet: path: /-/ready port: 9090 resources: requests: cpu: 10m volumeMounts: - name: config-volume mountPath: /etc/prometheus volumes: - name: config-volume configMap: name: prometheus affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/security/templates/deployment.yaml # istio CA watching all namespaces apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-citadel namespace: istio-system labels: app: security chart: security-1.0.0 release: istio heritage: Tiller istio: citadel spec: replicas: 1 template: metadata: labels: istio: citadel annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: istio-citadel-service-account containers: - name: citadel image: "docker.io/istio/citadel:1.0.0" imagePullPolicy: IfNotPresent args: - --append-dns-names=true - --grpc-port=8060 - --grpc-hostname=citadel - --citadel-storage-namespace=istio-system - --self-signed-ca=true resources: requests: cpu: 10m affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/sidecarInjectorWebhook/templates/deployment.yaml apiVersion: extensions/v1beta1 kind: Deployment metadata: name: istio-sidecar-injector namespace: istio-system labels: app: sidecarInjectorWebhook chart: sidecarInjectorWebhook-1.0.0 release: istio heritage: Tiller istio: sidecar-injector spec: replicas: 1 template: metadata: labels: istio: sidecar-injector annotations: sidecar.istio.io/inject: "false" scheduler.alpha.kubernetes.io/critical-pod: "" spec: serviceAccountName: istio-sidecar-injector-service-account containers: - name: sidecar-injector-webhook image: "docker.io/istio/sidecar_injector:1.0.0" imagePullPolicy: IfNotPresent args: - --caCertFile=/etc/istio/certs/root-cert.pem - --tlsCertFile=/etc/istio/certs/cert-chain.pem - --tlsKeyFile=/etc/istio/certs/key.pem - --injectConfig=/etc/istio/inject/config - --meshConfig=/etc/istio/config/mesh - --healthCheckInterval=2s - --healthCheckFile=/health volumeMounts: - name: config-volume mountPath: /etc/istio/config readOnly: true - name: certs mountPath: /etc/istio/certs readOnly: true - name: inject-config mountPath: /etc/istio/inject readOnly: true livenessProbe: exec: command: - /usr/local/bin/sidecar-injector - probe - --probe-path=/health - --interval=4s initialDelaySeconds: 4 periodSeconds: 4 readinessProbe: exec: command: - /usr/local/bin/sidecar-injector - probe - --probe-path=/health - --interval=4s initialDelaySeconds: 4 periodSeconds: 4 resources: requests: cpu: 10m volumes: - name: config-volume configMap: name: istio - name: certs secret: secretName: istio.istio-sidecar-injector-service-account - name: inject-config configMap: name: istio-sidecar-injector items: - key: config path: config affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - ppc64le - s390x preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - amd64 - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - ppc64le - weight: 2 preference: matchExpressions: - key: beta.kubernetes.io/arch operator: In values: - s390x --- # Source: istio/charts/pilot/templates/gateway.yaml apiVersion: networking.istio.io/v1alpha3 kind: Gateway metadata: name: istio-autogenerated-k8s-ingress namespace: istio-system spec: selector: istio: ingress servers: - port: number: 80 protocol: HTTP2 name: http hosts: - "*" --- --- # Source: istio/charts/gateways/templates/autoscale.yaml apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: istio-egressgateway namespace: istio-system spec: maxReplicas: 5 minReplicas: 1 scaleTargetRef: apiVersion: apps/v1beta1 kind: Deployment name: istio-egressgateway metrics: - type: Resource resource: name: cpu targetAverageUtilization: 60 --- apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: istio-ingressgateway namespace: istio-system spec: maxReplicas: 5 minReplicas: 1 scaleTargetRef: apiVersion: apps/v1beta1 kind: Deployment name: istio-ingressgateway metrics: - type: Resource resource: name: cpu targetAverageUtilization: 60 --- --- # Source: istio/charts/mixer/templates/autoscale.yaml apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: istio-policy namespace: istio-system spec: maxReplicas: 5 minReplicas: 1 scaleTargetRef: apiVersion: apps/v1beta1 kind: Deployment name: istio-policy metrics: - type: Resource resource: name: cpu targetAverageUtilization: 80 --- apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: istio-telemetry namespace: istio-system spec: maxReplicas: 5 minReplicas: 1 scaleTargetRef: apiVersion: apps/v1beta1 kind: Deployment name: istio-telemetry metrics: - type: Resource resource: name: cpu targetAverageUtilization: 80 --- --- # Source: istio/charts/pilot/templates/autoscale.yaml apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: istio-pilot spec: maxReplicas: 1 minReplicas: 1 scaleTargetRef: apiVersion: apps/v1beta1 kind: Deployment name: istio-pilot metrics: - type: Resource resource: name: cpu targetAverageUtilization: 55 --- --- # Source: istio/charts/sidecarInjectorWebhook/templates/mutatingwebhook.yaml apiVersion: admissionregistration.k8s.io/v1beta1 kind: MutatingWebhookConfiguration metadata: name: istio-sidecar-injector namespace: istio-system labels: app: istio-sidecar-injector chart: sidecarInjectorWebhook-1.0.0 release: istio heritage: Tiller webhooks: - name: sidecar-injector.istio.io clientConfig: service: name: istio-sidecar-injector namespace: istio-system path: "/inject" caBundle: "" rules: - operations: [ "CREATE" ] apiGroups: [""] apiVersions: ["v1"] resources: ["pods"] failurePolicy: Fail namespaceSelector: matchLabels: istio-injection: enabled --- # Source: istio/charts/galley/templates/validatingwehookconfiguration.yaml.tpl --- # Source: istio/charts/pilot/templates/meshexpansion.yaml --- # Source: istio/charts/security/templates/create-custom-resources-job.yaml --- # Source: istio/charts/security/templates/enable-mesh-mtls.yaml --- # Source: istio/charts/security/templates/meshexpansion.yaml --- --- # Source: istio/charts/telemetry-gateway/templates/gateway.yaml --- # Source: istio/templates/install-custom-resources.sh.tpl --- # Source: istio/charts/mixer/templates/config.yaml apiVersion: "config.istio.io/v1alpha2" kind: attributemanifest metadata: name: istioproxy namespace: istio-system spec: attributes: origin.ip: valueType: IP_ADDRESS origin.uid: valueType: STRING origin.user: valueType: STRING request.headers: valueType: STRING_MAP request.id: valueType: STRING request.host: valueType: STRING request.method: valueType: STRING request.path: valueType: STRING request.reason: valueType: STRING request.referer: valueType: STRING request.scheme: valueType: STRING request.total_size: valueType: INT64 request.size: valueType: INT64 request.time: valueType: TIMESTAMP request.useragent: valueType: STRING response.code: valueType: INT64 response.duration: valueType: DURATION response.headers: valueType: STRING_MAP response.total_size: valueType: INT64 response.size: valueType: INT64 response.time: valueType: TIMESTAMP source.uid: valueType: STRING source.user: # DEPRECATED valueType: STRING source.principal: valueType: STRING destination.uid: valueType: STRING destination.principal: valueType: STRING destination.port: valueType: INT64 connection.event: valueType: STRING connection.id: valueType: STRING connection.received.bytes: valueType: INT64 connection.received.bytes_total: valueType: INT64 connection.sent.bytes: valueType: INT64 connection.sent.bytes_total: valueType: INT64 connection.duration: valueType: DURATION connection.mtls: valueType: BOOL context.protocol: valueType: STRING context.timestamp: valueType: TIMESTAMP context.time: valueType: TIMESTAMP # Deprecated, kept for compatibility context.reporter.local: valueType: BOOL context.reporter.kind: valueType: STRING context.reporter.uid: valueType: STRING api.service: valueType: STRING api.version: valueType: STRING api.operation: valueType: STRING api.protocol: valueType: STRING request.auth.principal: valueType: STRING request.auth.audiences: valueType: STRING request.auth.presenter: valueType: STRING request.auth.claims: valueType: STRING_MAP request.auth.raw_claims: valueType: STRING request.api_key: valueType: STRING --- apiVersion: "config.istio.io/v1alpha2" kind: attributemanifest metadata: name: kubernetes namespace: istio-system spec: attributes: source.ip: valueType: IP_ADDRESS source.labels: valueType: STRING_MAP source.metadata: valueType: STRING_MAP source.name: valueType: STRING source.namespace: valueType: STRING source.owner: valueType: STRING source.service: # DEPRECATED valueType: STRING source.serviceAccount: valueType: STRING source.services: valueType: STRING source.workload.uid: valueType: STRING source.workload.name: valueType: STRING source.workload.namespace: valueType: STRING destination.ip: valueType: IP_ADDRESS destination.labels: valueType: STRING_MAP destination.metadata: valueType: STRING_MAP destination.owner: valueType: STRING destination.name: valueType: STRING destination.container.name: valueType: STRING destination.namespace: valueType: STRING destination.service: # DEPRECATED valueType: STRING destination.service.uid: valueType: STRING destination.service.name: valueType: STRING destination.service.namespace: valueType: STRING destination.service.host: valueType: STRING destination.serviceAccount: valueType: STRING destination.workload.uid: valueType: STRING destination.workload.name: valueType: STRING destination.workload.namespace: valueType: STRING --- apiVersion: "config.istio.io/v1alpha2" kind: stdio metadata: name: handler namespace: istio-system spec: outputAsJson: true --- apiVersion: "config.istio.io/v1alpha2" kind: logentry metadata: name: accesslog namespace: istio-system spec: severity: '"Info"' timestamp: request.time variables: sourceIp: source.ip | ip("0.0.0.0") sourceApp: source.labels["app"] | "" sourcePrincipal: source.principal | "" sourceName: source.name | "" sourceWorkload: source.workload.name | "" sourceNamespace: source.namespace | "" sourceOwner: source.owner | "" destinationApp: destination.labels["app"] | "" destinationIp: destination.ip | ip("0.0.0.0") destinationServiceHost: destination.service.host | "" destinationWorkload: destination.workload.name | "" destinationName: destination.name | "" destinationNamespace: destination.namespace | "" destinationOwner: destination.owner | "" destinationPrincipal: destination.principal | "" apiClaims: request.auth.raw_claims | "" apiKey: request.api_key | request.headers["x-api-key"] | "" protocol: request.scheme | context.protocol | "http" method: request.method | "" url: request.path | "" responseCode: response.code | 0 responseSize: response.size | 0 requestSize: request.size | 0 requestId: request.headers["x-request-id"] | "" clientTraceId: request.headers["x-client-trace-id"] | "" latency: response.duration | "0ms" connection_security_policy: conditional((context.reporter.kind | "inbound") == "outbound", "unknown", conditional(connection.mtls | false, "mutual_tls", "none")) userAgent: request.useragent | "" responseTimestamp: response.time receivedBytes: request.total_size | 0 sentBytes: response.total_size | 0 referer: request.referer | "" httpAuthority: request.headers[":authority"] | request.host | "" xForwardedFor: request.headers["x-forwarded-for"] | "0.0.0.0" reporter: conditional((context.reporter.kind | "inbound") == "outbound", "source", "destination") monitored_resource_type: '"global"' --- apiVersion: "config.istio.io/v1alpha2" kind: logentry metadata: name: tcpaccesslog namespace: istio-system spec: severity: '"Info"' timestamp: context.time | timestamp("2017-01-01T00:00:00Z") variables: connectionEvent: connection.event | "" sourceIp: source.ip | ip("0.0.0.0") sourceApp: source.labels["app"] | "" sourcePrincipal: source.principal | "" sourceName: source.name | "" sourceWorkload: source.workload.name | "" sourceNamespace: source.namespace | "" sourceOwner: source.owner | "" destinationApp: destination.labels["app"] | "" destinationIp: destination.ip | ip("0.0.0.0") destinationServiceHost: destination.service.host | "" destinationWorkload: destination.workload.name | "" destinationName: destination.name | "" destinationNamespace: destination.namespace | "" destinationOwner: destination.owner | "" destinationPrincipal: destination.principal | "" protocol: context.protocol | "tcp" connectionDuration: connection.duration | "0ms" connection_security_policy: conditional((context.reporter.kind | "inbound") == "outbound", "unknown", conditional(connection.mtls | false, "mutual_tls", "none")) receivedBytes: connection.received.bytes | 0 sentBytes: connection.sent.bytes | 0 totalReceivedBytes: connection.received.bytes_total | 0 totalSentBytes: connection.sent.bytes_total | 0 reporter: conditional((context.reporter.kind | "inbound") == "outbound", "source", "destination") monitored_resource_type: '"global"' --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: stdio namespace: istio-system spec: match: context.protocol == "http" || context.protocol == "grpc" actions: - handler: handler.stdio instances: - accesslog.logentry --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: stdiotcp namespace: istio-system spec: match: context.protocol == "tcp" actions: - handler: handler.stdio instances: - tcpaccesslog.logentry --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: requestcount namespace: istio-system spec: value: "1" dimensions: reporter: conditional((context.reporter.kind | "inbound") == "outbound", "source", "destination") source_workload: source.workload.name | "unknown" source_workload_namespace: source.workload.namespace | "unknown" source_principal: source.principal | "unknown" source_app: source.labels["app"] | "unknown" source_version: source.labels["version"] | "unknown" destination_workload: destination.workload.name | "unknown" destination_workload_namespace: destination.workload.namespace | "unknown" destination_principal: destination.principal | "unknown" destination_app: destination.labels["app"] | "unknown" destination_version: destination.labels["version"] | "unknown" destination_service: destination.service.host | "unknown" destination_service_name: destination.service.name | "unknown" destination_service_namespace: destination.service.namespace | "unknown" request_protocol: api.protocol | context.protocol | "unknown" response_code: response.code | 200 connection_security_policy: conditional((context.reporter.kind | "inbound") == "outbound", "unknown", conditional(connection.mtls | false, "mutual_tls", "none")) monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: requestduration namespace: istio-system spec: value: response.duration | "0ms" dimensions: reporter: conditional((context.reporter.kind | "inbound") == "outbound", "source", "destination") source_workload: source.workload.name | "unknown" source_workload_namespace: source.workload.namespace | "unknown" source_principal: source.principal | "unknown" source_app: source.labels["app"] | "unknown" source_version: source.labels["version"] | "unknown" destination_workload: destination.workload.name | "unknown" destination_workload_namespace: destination.workload.namespace | "unknown" destination_principal: destination.principal | "unknown" destination_app: destination.labels["app"] | "unknown" destination_version: destination.labels["version"] | "unknown" destination_service: destination.service.host | "unknown" destination_service_name: destination.service.name | "unknown" destination_service_namespace: destination.service.namespace | "unknown" request_protocol: api.protocol | context.protocol | "unknown" response_code: response.code | 200 connection_security_policy: conditional((context.reporter.kind | "inbound") == "outbound", "unknown", conditional(connection.mtls | false, "mutual_tls", "none")) monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: requestsize namespace: istio-system spec: value: request.size | 0 dimensions: reporter: conditional((context.reporter.kind | "inbound") == "outbound", "source", "destination") source_workload: source.workload.name | "unknown" source_workload_namespace: source.workload.namespace | "unknown" source_principal: source.principal | "unknown" source_app: source.labels["app"] | "unknown" source_version: source.labels["version"] | "unknown" destination_workload: destination.workload.name | "unknown" destination_workload_namespace: destination.workload.namespace | "unknown" destination_principal: destination.principal | "unknown" destination_app: destination.labels["app"] | "unknown" destination_version: destination.labels["version"] | "unknown" destination_service: destination.service.host | "unknown" destination_service_name: destination.service.name | "unknown" destination_service_namespace: destination.service.namespace | "unknown" request_protocol: api.protocol | context.protocol | "unknown" response_code: response.code | 200 connection_security_policy: conditional((context.reporter.kind | "inbound") == "outbound", "unknown", conditional(connection.mtls | false, "mutual_tls", "none")) monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: responsesize namespace: istio-system spec: value: response.size | 0 dimensions: reporter: conditional((context.reporter.kind | "inbound") == "outbound", "source", "destination") source_workload: source.workload.name | "unknown" source_workload_namespace: source.workload.namespace | "unknown" source_principal: source.principal | "unknown" source_app: source.labels["app"] | "unknown" source_version: source.labels["version"] | "unknown" destination_workload: destination.workload.name | "unknown" destination_workload_namespace: destination.workload.namespace | "unknown" destination_principal: destination.principal | "unknown" destination_app: destination.labels["app"] | "unknown" destination_version: destination.labels["version"] | "unknown" destination_service: destination.service.host | "unknown" destination_service_name: destination.service.name | "unknown" destination_service_namespace: destination.service.namespace | "unknown" request_protocol: api.protocol | context.protocol | "unknown" response_code: response.code | 200 connection_security_policy: conditional((context.reporter.kind | "inbound") == "outbound", "unknown", conditional(connection.mtls | false, "mutual_tls", "none")) monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: tcpbytesent namespace: istio-system spec: value: connection.sent.bytes | 0 dimensions: reporter: conditional((context.reporter.kind | "inbound") == "outbound", "source", "destination") source_workload: source.workload.name | "unknown" source_workload_namespace: source.workload.namespace | "unknown" source_principal: source.principal | "unknown" source_app: source.labels["app"] | "unknown" source_version: source.labels["version"] | "unknown" destination_workload: destination.workload.name | "unknown" destination_workload_namespace: destination.workload.namespace | "unknown" destination_principal: destination.principal | "unknown" destination_app: destination.labels["app"] | "unknown" destination_version: destination.labels["version"] | "unknown" destination_service: destination.service.name | "unknown" destination_service_name: destination.service.name | "unknown" destination_service_namespace: destination.service.namespace | "unknown" connection_security_policy: conditional((context.reporter.kind | "inbound") == "outbound", "unknown", conditional(connection.mtls | false, "mutual_tls", "none")) monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: metric metadata: name: tcpbytereceived namespace: istio-system spec: value: connection.received.bytes | 0 dimensions: reporter: conditional((context.reporter.kind | "inbound") == "outbound", "source", "destination") source_workload: source.workload.name | "unknown" source_workload_namespace: source.workload.namespace | "unknown" source_principal: source.principal | "unknown" source_app: source.labels["app"] | "unknown" source_version: source.labels["version"] | "unknown" destination_workload: destination.workload.name | "unknown" destination_workload_namespace: destination.workload.namespace | "unknown" destination_principal: destination.principal | "unknown" destination_app: destination.labels["app"] | "unknown" destination_version: destination.labels["version"] | "unknown" destination_service: destination.service.name | "unknown" destination_service_name: destination.service.name | "unknown" destination_service_namespace: destination.service.namespace | "unknown" connection_security_policy: conditional((context.reporter.kind | "inbound") == "outbound", "unknown", conditional(connection.mtls | false, "mutual_tls", "none")) monitored_resource_type: '"UNSPECIFIED"' --- apiVersion: "config.istio.io/v1alpha2" kind: prometheus metadata: name: handler namespace: istio-system spec: metrics: - name: requests_total instance_name: requestcount.metric.istio-system kind: COUNTER label_names: - reporter - source_app - source_principal - source_workload - source_workload_namespace - source_version - destination_app - destination_principal - destination_workload - destination_workload_namespace - destination_version - destination_service - destination_service_name - destination_service_namespace - request_protocol - response_code - connection_security_policy - name: request_duration_seconds instance_name: requestduration.metric.istio-system kind: DISTRIBUTION label_names: - reporter - source_app - source_principal - source_workload - source_workload_namespace - source_version - destination_app - destination_principal - destination_workload - destination_workload_namespace - destination_version - destination_service - destination_service_name - destination_service_namespace - request_protocol - response_code - connection_security_policy buckets: explicit_buckets: bounds: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10] - name: request_bytes instance_name: requestsize.metric.istio-system kind: DISTRIBUTION label_names: - reporter - source_app - source_principal - source_workload - source_workload_namespace - source_version - destination_app - destination_principal - destination_workload - destination_workload_namespace - destination_version - destination_service - destination_service_name - destination_service_namespace - request_protocol - response_code - connection_security_policy buckets: exponentialBuckets: numFiniteBuckets: 8 scale: 1 growthFactor: 10 - name: response_bytes instance_name: responsesize.metric.istio-system kind: DISTRIBUTION label_names: - reporter - source_app - source_principal - source_workload - source_workload_namespace - source_version - destination_app - destination_principal - destination_workload - destination_workload_namespace - destination_version - destination_service - destination_service_name - destination_service_namespace - request_protocol - response_code - connection_security_policy buckets: exponentialBuckets: numFiniteBuckets: 8 scale: 1 growthFactor: 10 - name: tcp_sent_bytes_total instance_name: tcpbytesent.metric.istio-system kind: COUNTER label_names: - reporter - source_app - source_principal - source_workload - source_workload_namespace - source_version - destination_app - destination_principal - destination_workload - destination_workload_namespace - destination_version - destination_service - destination_service_name - destination_service_namespace - connection_security_policy - name: tcp_received_bytes_total instance_name: tcpbytereceived.metric.istio-system kind: COUNTER label_names: - reporter - source_app - source_principal - source_workload - source_workload_namespace - source_version - destination_app - destination_principal - destination_workload - destination_workload_namespace - destination_version - destination_service - destination_service_name - destination_service_namespace - connection_security_policy --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: promhttp namespace: istio-system spec: match: context.protocol == "http" || context.protocol == "grpc" actions: - handler: handler.prometheus instances: - requestcount.metric - requestduration.metric - requestsize.metric - responsesize.metric --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: promtcp namespace: istio-system spec: match: context.protocol == "tcp" actions: - handler: handler.prometheus instances: - tcpbytesent.metric - tcpbytereceived.metric --- apiVersion: "config.istio.io/v1alpha2" kind: kubernetesenv metadata: name: handler namespace: istio-system spec: # when running from mixer root, use the following config after adding a # symbolic link to a kubernetes config file via: # # $ ln -s ~/.kube/config mixer/adapter/kubernetes/kubeconfig # # kubeconfig_path: "mixer/adapter/kubernetes/kubeconfig" --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: kubeattrgenrulerule namespace: istio-system spec: actions: - handler: handler.kubernetesenv instances: - attributes.kubernetes --- apiVersion: "config.istio.io/v1alpha2" kind: rule metadata: name: tcpkubeattrgenrulerule namespace: istio-system spec: match: context.protocol == "tcp" actions: - handler: handler.kubernetesenv instances: - attributes.kubernetes --- apiVersion: "config.istio.io/v1alpha2" kind: kubernetes metadata: name: attributes namespace: istio-system spec: # Pass the required attribute data to the adapter source_uid: source.uid | "" source_ip: source.ip | ip("0.0.0.0") # default to unspecified ip addr destination_uid: destination.uid | "" destination_port: destination.port | 0 attribute_bindings: # Fill the new attributes from the adapter produced output. # $out refers to an instance of OutputTemplate message source.ip: $out.source_pod_ip | ip("0.0.0.0") source.uid: $out.source_pod_uid | "unknown" source.labels: $out.source_labels | emptyStringMap() source.name: $out.source_pod_name | "unknown" source.namespace: $out.source_namespace | "default" source.owner: $out.source_owner | "unknown" source.serviceAccount: $out.source_service_account_name | "unknown" source.workload.uid: $out.source_workload_uid | "unknown" source.workload.name: $out.source_workload_name | "unknown" source.workload.namespace: $out.source_workload_namespace | "unknown" destination.ip: $out.destination_pod_ip | ip("0.0.0.0") destination.uid: $out.destination_pod_uid | "unknown" destination.labels: $out.destination_labels | emptyStringMap() destination.name: $out.destination_pod_name | "unknown" destination.container.name: $out.destination_container_name | "unknown" destination.namespace: $out.destination_namespace | "default" destination.owner: $out.destination_owner | "unknown" destination.serviceAccount: $out.destination_service_account_name | "unknown" destination.workload.uid: $out.destination_workload_uid | "unknown" destination.workload.name: $out.destination_workload_name | "unknown" destination.workload.namespace: $out.destination_workload_namespace | "unknown" --- # Configuration needed by Mixer. # Mixer cluster is delivered via CDS # Specify mixer cluster settings apiVersion: networking.istio.io/v1alpha3 kind: DestinationRule metadata: name: istio-policy namespace: istio-system spec: host: istio-policy.istio-system.svc.cluster.local trafficPolicy: connectionPool: http: http2MaxRequests: 10000 maxRequestsPerConnection: 10000 --- apiVersion: networking.istio.io/v1alpha3 kind: DestinationRule metadata: name: istio-telemetry namespace: istio-system spec: host: istio-telemetry.istio-system.svc.cluster.local trafficPolicy: connectionPool: http: http2MaxRequests: 10000 maxRequestsPerConnection: 10000 --- ================================================ FILE: examples/90_Kubernetes/minikube/README.md ================================================ # Development with Minikube ## Install `minikube`, `kubectl`, and `helm` This only need to be done one time, or periodically if you wish to upgrade. ``` sudo apt update && sudo apt install -y --no-install-recommends socat curl -Lo minikube https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 && chmod +x minikube && sudo mv minikube /usr/local/bin/ curl -Lo kubectl https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && chmod +x kubectl && sudo mv kubectl /usr/local/bin curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get > get_helm.sh chmod 700 get_helm.sh ./get_helm.sh ``` Add the `coreos/prometheus-operator` repo: ``` helm repo add coreos https://s3-eu-west-1.amazonaws.com/coreos-charts/stable/ ``` ## Launch a Kubernetes Cluster ``` ./bootstrap.sh ``` Check configurations: ``` kubectl get all kubectl get all --all-namespaces kubectl get nodes -o=custom-columns=NAME:.metadata.name,GPUs:.status.capacity.'nvidia\.com/gpu' # last command should report the number of GPUs on your system # this make take some time - coffee?! ``` ================================================ FILE: examples/90_Kubernetes/minikube/bootstrap.sh ================================================ #!/bin/bash mkdir -p $HOME/.kube touch $HOME/.kube/config export MINIKUBE_HOME=$HOME export CHANGE_MINIKUBE_NONE_USER=true export KUBECONFIG=$HOME/.kube/config version=v1.10 sudo minikube start \ --feature-gates=DevicePlugins=true \ --vm-driver=none \ --kubernetes-version=${version}.0 \ --bootstrapper=kubeadm \ --extra-config=kubelet.authentication-token-webhook=true \ --extra-config=kubelet.authorization-mode=Webhook \ --extra-config=scheduler.address=0.0.0.0 \ --extra-config=controller-manager.address=0.0.0.0 \ --extra-config=controller-manager.cluster-signing-cert-file="/var/lib/localkube/certs/ca.crt" \ --extra-config=controller-manager.cluster-signing-key-file="/var/lib/localkube/certs/ca.key" \ --extra-config=apiserver.admission-control="NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ResourceQuota" if [ ! -e $HOME/.kube ]; then sudo mv /root/.kube $HOME/.kube > /dev/null 2>&1 ||: # this will write over any previous configuration sudo chown -R $USER $HOME/.kube > /dev/null 2>&1 ||: sudo chgrp -R $USER $HOME/.kube > /dev/null 2>&1 ||: fi if [ ! -e $HOME/.minikube ]; then sudo mv /root/.minikube $HOME/.minikube # > /dev/null 2>&1 ||: this will write over any previous configuration sudo chown -R $USER $HOME/.minikube > /dev/null 2>&1 ||: sudo chgrp -R $USER $HOME/.minikube > /dev/null 2>&1 ||: fi kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/${version}/nvidia-device-plugin.yml # dns fix for dgx-stations using ubuntu's network manager kubectl apply -f https://raw.githubusercontent.com/ryanolson/k8s-upstream-dns/master/dns.yml ================================================ FILE: examples/90_Kubernetes/prometheus/bootstrap.sh ================================================ #!/bin/bash kubectl create -f service-account.yml helm init --wait --service-account tiller helm repo add coreos https://s3-eu-west-1.amazonaws.com/coreos-charts/stable/ helm install coreos/prometheus-operator \ --name prometheus-operator \ --namespace monitoring helm install coreos/kube-prometheus \ --name kube-prometheus \ --namespace monitoring \ -f custom-settings.yml kubectl apply -f yais-metrics.yml ================================================ FILE: examples/90_Kubernetes/prometheus/custom-settings.yml ================================================ global: rbacEnable: true #prometheus: # service: # type: NodePort grafana: # image: # tag: 5.2.1 service: type: NodePort dataSource: yais-datasource.json: |+ { "access": "proxy", "basicAuth": false, "name": "yais", "type": "prometheus", "url": "http://yais-metrics.default:9090" } serverDashboardFiles: yais-dashboard.json: |+ { "dashboard": { "__inputs": [ { "name": "DS_YAIS", "label": "yais", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "5.0.0" }, { "type": "panel", "id": "graph", "name": "Graph", "version": "5.0.0" }, { "type": "panel", "id": "heatmap", "name": "Heatmap", "version": "5.0.0" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "5.0.0" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "id": null, "links": [], "panels": [ { "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": "${DS_YAIS}", "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 0 }, "id": 8, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "irate(yais_inference_load_ratio_count[5s])", "format": "time_series", "intervalFactor": 1, "refId": "A" }, { "expr": "", "format": "time_series", "intervalFactor": 1, "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Inference Rate (FPS)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_YAIS}", "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 0 }, "id": 4, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": true, "targets": [ { "expr": "yais_gpus_power_usage", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "GPU Power (Watts)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_YAIS}", "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 9 }, "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "yais_executor_queue_depth", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Queue Depth", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "cards": { "cardPadding": null, "cardRound": null }, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateOranges", "exponent": 0.5, "mode": "spectrum" }, "dataFormat": "timeseries", "datasource": "${DS_YAIS}", "gridPos": { "h": 9, "w": 12, "x": 12, "y": 9 }, "heatmap": {}, "highlightCards": true, "id": 10, "interval": "", "legend": { "show": false }, "links": [], "targets": [ { "expr": "irate(yais_inference_load_ratio_bucket[15s])", "format": "time_series", "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "title": "Load Ratio (Ideal 0-1)", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", "xAxis": { "show": true }, "xBucketNumber": null, "xBucketSize": null, "yAxis": { "decimals": null, "format": "short", "logBase": 10, "max": null, "min": "0", "show": true, "splitFactor": 2 }, "yBucketNumber": null, "yBucketSize": null } ], "refresh": "5s", "schemaVersion": 16, "style": "dark", "tags": [], "templating": { "list": [] }, "time": { "from": "now-30m", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "", "title": "YAIS", "uid": "3WLjQkdmk", "version": 1 }, "inputs": [ { "name": "DS_YAIS", "pluginId": "prometheus", "type": "datasource", "value": "yais" } ], "overwrite": true } ================================================ FILE: examples/90_Kubernetes/prometheus/service-account.yml ================================================ # Create a service account for Helm and grant the cluster admin role. # It is assumed that helm should be installed with this service account # (tiller). apiVersion: v1 kind: ServiceAccount metadata: name: tiller namespace: kube-system --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: tiller roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: cluster-admin subjects: - kind: ServiceAccount name: tiller namespace: kube-system ================================================ FILE: examples/90_Kubernetes/prometheus/yais-dashboard.json ================================================ { "__inputs": [ { "name": "DS_YAIS", "label": "yais", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "5.2.1" }, { "type": "panel", "id": "graph", "name": "Graph", "version": "5.0.0" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "5.0.0" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "gnetId": null, "graphTooltip": 0, "id": null, "links": [], "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_YAIS}", "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 0 }, "id": 8, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(yais_inference_load_ratio_count[5s])", "format": "time_series", "intervalFactor": 1, "refId": "A" }, { "expr": "irate(yais_inference_load_ratio_count[5s])", "format": "time_series", "intervalFactor": 1, "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Inference Rate (FPS)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_YAIS}", "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 0 }, "id": 4, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "yais_gpus_power_usage", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "GPU Power (Watts)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_YAIS}", "fill": 1, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 9 }, "id": 2, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "yais_executor_queue_depth", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, "title": "Queue Depth", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } } ], "schemaVersion": 16, "style": "dark", "tags": [], "templating": { "list": [] }, "time": { "from": "now-15m", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "", "title": "YAIS", "uid": "3WLjQkdmk", "version": 1 } ================================================ FILE: examples/90_Kubernetes/prometheus/yais-metrics.yml ================================================ # # Create a Service Account, Role, Role Binding # YAIS Specific Prometheus (via Operator) and Service # --- apiVersion: v1 kind: ServiceAccount metadata: name: prometheus --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRole metadata: name: prometheus rules: - apiGroups: [""] resources: - nodes - services - endpoints - pods verbs: ["get", "list", "watch"] - apiGroups: [""] resources: - configmaps verbs: ["get"] - nonResourceURLs: ["/metrics"] verbs: ["get"] --- apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: prometheus roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus subjects: - kind: ServiceAccount name: prometheus namespace: default --- apiVersion: monitoring.coreos.com/v1 kind: Prometheus metadata: name: yais-metrics spec: serviceAccountName: prometheus serviceMonitorSelector: matchLabels: scrape: yais resources: requests: memory: 400Mi --- apiVersion: v1 kind: Service metadata: name: yais-metrics spec: ports: - name: web port: 9090 selector: prometheus: yais-metrics ================================================ FILE: examples/90_Kubernetes/yais-deploy.yml ================================================ --- apiVersion: apps/v1 kind: Deployment metadata: name: yais-example spec: replicas: 1 selector: matchLabels: app: yais-example template: metadata: labels: app: yais-example annotations: sidecar.istio.io/inject: "true" spec: containers: - name: yais-example image: yais command: ["/work/examples/90_Kubernetes/deploy/build-and-run.sh"] imagePullPolicy: IfNotPresent # Always env: - name: YAIS_CONCURRENCY value: "8" resources: limits: nvidia.com/gpu: 1 ports: - name: grpc containerPort: 50051 - name: metrics containerPort: 50078 livenessProbe: tcpSocket: port: 50051 initialDelaySeconds: 5 periodSeconds: 5 readinessProbe: tcpSocket: port: 50051 initialDelaySeconds: 5 periodSeconds: 5 --- apiVersion: v1 kind: Service metadata: name: yais-example labels: app: yais-example spec: selector: app: yais-example ports: - name: grpc port: 50051 targetPort: grpc - name: metrics port: 50078 targetPort: metrics --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: yais-example labels: scrape: yais spec: selector: matchLabels: app: yais-example endpoints: - port: metrics interval: 2s honorLabels: true --- apiVersion: networking.istio.io/v1alpha3 kind: Gateway metadata: name: yais-gateway spec: selector: istio: ingressgateway # use istio default controller servers: - hosts: - "*" port: name: grpc number: 80 protocol: grpc --- apiVersion: networking.istio.io/v1alpha3 kind: VirtualService metadata: name: yais-virtual-service spec: hosts: - "*" gateways: - yais-gateway http: - match: - uri: prefix: / route: - destination: host: yais-example.default.svc.cluster.local port: number: 50051 ================================================ FILE: examples/91_Prometheus/README.md ================================================ # Prometheus WIP ================================================ FILE: examples/91_Prometheus/scrape.conf ================================================ [[inputs.prometheus]] urls = ["http://localhost:50078/metrics"] [[outputs.file]] files = ["stdout"] ================================================ FILE: examples/97_SingleProcessMultiSteam/launch_service.sh ================================================ #!/bin/bash -e # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # cleanup() { kill $(jobs -p) ||: } trap "cleanup" EXIT SIGINT SIGTERM ENG=${1:-/work/models/ResNet-50-b1-fp32.engine} NCTX=${2:-1} if [ ! -e $ENG ]; then echo "$ENG not found" exit 911 fi port=50051 /work/build/examples/02_TensorRT_GRPC/inference-grpc.x --port=$port --engine=${ENG} --contexts=$NCTX & wait-for-it.sh localhost:$port --timeout=0 -- echo "YAIS Service is ready." > /dev/null 2>&1 echo "warmup with client-async.x" /work/build/examples/02_TensorRT_GRPC/client-async.x --count=1000 --port=$port echo echo "Starting a shell keeping the services and load-balancer running..." echo "Try /work/build/examples/02_TensorRT_GRPC/siege.x --rate=2000 --port=$port" bash --rcfile <(echo "PS1='$NCTX x $ENG Subshell: '") ================================================ FILE: examples/98_MultiProcessSingleStream/README.md ================================================ # MPS Examples `run_throughput_test ncopies batch_size engine_file MPS/NOMPS` V100 - 16GB - DGX-1V Processes | MPS | FPS | Batch | Model --------- | --- | --- | ----- | ----- 1 | N | 383 | 1 | RN50 8 | N | 365 | 1 | RN50 8 | Y | 929 | 1 | RN50 ``` root@dgx11:/work/src/Examples/98_MultiProcessSingleStream# ./run_throughput_test 8 1 /work/models/ResNet-50-b1-fp32.engine MPS starting 8 inference services starting load balancer load balancing over ports: ['50051', '50052', '50053', '50054', '50055', '50056', '50057', '50058'] running test client 1000 requests in 1.07632seconds; inf/sec: 929.095 root@dgx11:/work/src/Examples/98_MultiProcessSingleStream# ./run_throughput_test 8 1 /work/models/ResNet-50-b1-fp32.engine NOMPS starting 8 inference services starting load balancer load balancing over ports: ['50051', '50052', '50053', '50054', '50055', '50056', '50057', '50058'] running test client 1000 requests in 2.74228seconds; inf/sec: 364.66 root@dgx11:/work/src/Examples/98_MultiProcessSingleStream# ./run_throughput_test 1 1 /work/models/ResNet-50-b1-fp32.engine NOMPS starting 1 inference services starting load balancer load balancing over ports: ['50051'] running test client 1000 requests in 2.60915seconds; inf/sec: 383.267 ``` ================================================ FILE: examples/98_MultiProcessSingleStream/run_latency_test ================================================ #!/bin/bash -e # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # trap 'kill $(jobs -p)' EXIT BS=${1:-1} ENG=${2:-/work/models/ResNet-50-b2-int8.engine} if [ ! -e $END ]; then echo "$ENG not found" exit 911 fi echo "starting inference service" port=50100 /work/build/examples/02_TensorRT_GRPC/inference-grpc.x --port $port --engine=${ENG} & # > /dev/null 2>&1 & wait-for-it.sh localhost:$port --timeout=0 -- echo "Server ${i} is ready." > /dev/null 2>&1 echo "running latency client" /work/build/examples/02_TensorRT_GRPC/client-sync.x --port=$port ================================================ FILE: examples/98_MultiProcessSingleStream/run_throughput_test ================================================ #!/bin/bash -e # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # cleanup() { kill $(jobs -p) ||: echo quit | nvidia-cuda-mps-control > /dev/null 2>&1 ||: } trap "cleanup" EXIT SIGINT SIGTERM NCTX=${1:-1} BS=${2:-1} ENG=${3:-/work/models/ResNet-50-b1-fp32.engine} MPS=${4:-"MPS"} if [ ! -e $ENG ]; then echo "$ENG not found" exit 911 fi if [ "$MPS" = "MPS" ]; then nvidia-cuda-mps-control -d ||: fi sleep 1 echo "starting $NCTX inference services" for i in $(seq 1 $NCTX); do port=$(echo "50050 + $i" | bc) /work/build/examples/02_TensorRT_GRPC/inference-grpc.x --port $port --engine=${ENG} > /dev/null 2>&1 & wait-for-it.sh localhost:$port --timeout=0 -- echo "Server ${i} is ready." > /dev/null 2>&1 done echo "starting load balancer" ../99_LoadBalancer/run_loadbalancer.py -n $NCTX # envoy -c /tmp/lb-envoy.yaml --disable-hot-restart > /dev/null 2>&1 & envoy -c /tmp/lb-envoy.yaml > /dev/null 2>&1 & wait-for-it.sh localhost:50050 --timeout=0 -- echo "Load balancer is ready." > /dev/null 2>&1 echo "running test client" /work/build/examples/02_TensorRT_GRPC/client-async.x --count=1000 --port=50050 echo echo "Starting a shell keeping the services and load-balancer running..." echo "Try /work/build/examples/02_TensorRT_GRPC/siege.x --rate=2000 --port=50050" bash --rcfile <(echo "PS1='Throughput Subshell: '") ================================================ FILE: examples/98_MultiProcessSingleStream/setup.py ================================================ #!/usr/bin/env python3 # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # import os import subprocess models = [ ("ResNet-50-deploy.prototxt", "prob"), # ("ResNet-152-deploy.prototxt", "prob"), ] precisions = [ ("fp32", ""), ("fp16", "--fp16"), ("int8", "--int8") ] def main(): for model, o in models: for name, p in precisions: for b in [1]: #, 2, 4, 8]: n = "b{}-{}".format(b, name) e = model.replace("prototxt", "engine") e = e.replace("deploy", n) m = os.path.join("/work/models", model) if os.path.isfile(e): continue subprocess.call("giexec --deploy={} --batch={} --output={} {} --engine={}".format( m, b, o, p, e ), shell=True) if __name__ == "__main__": main() ================================================ FILE: examples/99_LoadBalancer/README.md ================================================ # Envoy Load Balancer Very basic Envoy Proxy L7 load balancer for testing purposes. `run_loadbalancer.py -n ` will start a copy of envoy listening on port `50050` and load-balancing over ports `[50051, 50051+n-1]`. You are responsible for spinning up the backend services. ## Notes The load-balancer overhead appears to be about 150us. Running the `client-sync.x` directly to a backend vs. through the load-balancer shows about 150us overhead per transaction. ``` # direct Throughput Subshell: /work/build/examples/02_TensorRT_GRPC/client-sync.x --port 50051 1000 requests in 2.69029 seconds; inf/sec: 371.707 # proxied via envoy load-balancer Throughput Subshell: /work/build/examples/02_TensorRT_GRPC/client-sync.x --port 50050 1000 requests in 2.8411 seconds; inf/sec: 351.977 ``` ================================================ FILE: examples/99_LoadBalancer/lb-envoy.j2 ================================================ {#- # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -#} static_resources: listeners: - address: socket_address: address: 0.0.0.0 port_value: 50050 filter_chains: - filters: - name: envoy.http_connection_manager config: codec_type: auto stat_prefix: ingress_http route_config: name: local_route virtual_hosts: - name: backend domains: - "*" routes: - match: prefix: "/" headers: - name: content-type value: application/grpc route: cluster: inference http_filters: - name: envoy.router config: {} clusters: - name: inference connect_timeout: 0.25s type: strict_dns lb_policy: round_robin http2_protocol_options: {} hosts: {% for port in ports %} - socket_address: address: 127.0.0.1 port_value: {{ port }} {% endfor %} admin: access_log_path: "/dev/null" address: socket_address: address: 0.0.0.0 port_value: 8001 ================================================ FILE: examples/99_LoadBalancer/run_loadbalancer.py ================================================ #!/usr/bin/env python3 # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # import os import inspect import shutil import tempfile import click from jinja2 import Environment, FileSystemLoader, Template def render(template_path, data=None, extensions=None, strict=False): data = data or {} extensions = extensions or [] env = Environment( loader=FileSystemLoader(os.path.dirname(template_path)), extensions=extensions, keep_trailing_newline=True, ) if strict: from jinja2 import StrictUndefined env.undefined = StrictUndefined # Add environ global env.globals['environ'] = os.environ.get return env.get_template(os.path.basename(template_path)).render(data) script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) FileType = click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True) @click.command() @click.option("-n", default=1) @click.option("--template", type=FileType, default=os.path.join(script_path, "lb-envoy.j2")) def main(n, template): envoy = shutil.which("envoy") if not os.path.isfile(envoy): raise RuntimeError("envoy executable not found in currently directory: {}".format(envoy)) ports = [50051 + p for p in range(n)] print("load balancing over ports: ", [str(p) for p in ports]) with open("/tmp/lb-envoy.yaml", "w") as file: file.write(render(template, data={"ports": ports})) # os.system("{} -c /tmp/lb-envoy.yaml".format(envoy)) if __name__ == "__main__": main() ================================================ FILE: examples/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. find_package(gflags 2.2.1 REQUIRED) add_subdirectory(nvRPC) add_subdirectory(Deployment) add_subdirectory(00_TensorRT) add_subdirectory(01_Basic_GRPC) add_subdirectory(02_TensorRT_GRPC) #add_subdirectory(03_Batching) add_subdirectory(04_Middleman) add_subdirectory(10_Internals) add_subdirectory(11_Protos) add_subdirectory(12_FlatBuffers) #add_subdirectory(30_PyTensorRT) ================================================ FILE: examples/Deployment/CMakeLists.txt ================================================ add_subdirectory(ImageClient) add_subdirectory(RouteRequests) ================================================ FILE: examples/Deployment/ImageClient/CMakeLists.txt ================================================ set(protobuf_MODULE_COMPATIBLE TRUE) find_package(Protobuf CONFIG REQUIRED) message(STATUS "Using protobuf ${protobuf_VERSION}") set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf) set(_PROTOBUF_PROTOC $) INCLUDE(GRPCGenerateCPP) PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS api.proto ) PROTOBUF_GENERATE_GRPC_CPP(PROTO_GRPC_SRCS PROTO_GRPC_HDRS api.proto ) add_library(deploy-image-client-protos ${PROTO_SRCS} ${PROTO_GRPC_SRCS} ) target_link_libraries(deploy-image-client-protos PUBLIC ${_PROTOBUF_LIBPROTOBUF} ) target_include_directories(deploy-image-client-protos PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ) pybind11_add_module(deploy_image_client client.cc ) target_link_libraries(deploy_image_client PUBLIC nvrpc-client deploy-image-client-protos ) ================================================ FILE: examples/Deployment/ImageClient/api.proto ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ syntax = "proto3"; package trtlab.deploy.image_client; service Inference { rpc Classify (ImageInfo) returns (Classifications) {} rpc Detection (ImageInfo) returns (Detections) {} rpc ClassifyStream (stream ImageInfo) returns (stream Classifications) {} rpc DetectionSream (stream ImageInfo) returns (stream Detections) {} } message ImageInfo { string api_key = 1; string image_uuid = 2; string model_name = 3; } message Classifications { string image_uuid = 1; repeated Score scores =2; Details details = 3; } message Detections { string image_uuid = 1; repeated Object objects = 2; Details details = 3; } message Score { int32 class_idx = 1; float score = 2; } message Object { string class_name = 1; float upper_left_x = 2; // [0, 1] normalized on width float upper_left_y = 3; // [0, 1] normalized on height float width = 4; // [0, 1] normalized on width float height = 5; // [0, 1] normalized on height } // this is where you can customize what details your service // returns to the user message Details { float request_time = 1; float compute_time = 2; } ================================================ FILE: examples/Deployment/ImageClient/client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "client.h" using namespace trtlab; using namespace nvrpc; namespace py = pybind11; using deploy::image_client::Classifications; using deploy::image_client::Detections; using deploy::image_client::ImageInfo; using deploy::image_client::Inference; ImageClient::ImageClient(std::string hostname) { auto executor = std::make_shared(1); auto channel = grpc::CreateChannel(hostname, grpc::InsecureChannelCredentials()); std::shared_ptr stub = Inference::NewStub(channel); auto classify_prepare_fn = [stub](::grpc::ClientContext * context, const ImageInfo& request, ::grpc::CompletionQueue* cq) -> auto { return std::move(stub->PrepareAsyncClassify(context, request, cq)); }; auto detection_prepare_fn = [stub](::grpc::ClientContext * context, const ImageInfo& request, ::grpc::CompletionQueue* cq) -> auto { return std::move(stub->PrepareAsyncDetection(context, request, cq)); }; m_ClassifyClient = std::make_unique>( classify_prepare_fn, executor); m_DetectionClient = std::make_unique>( detection_prepare_fn, executor); } std::shared_future ImageClient::Classify(const std::string& model_name, const std::string& image_uuid) { ImageInfo image_info; image_info.set_model_name(model_name); image_info.set_image_uuid(image_uuid); std::map headers = {{"custom-metadata-model-name", model_name}}; auto post = [](ImageInfo& input, Classifications& output, ::grpc::Status& status) -> ClassifyResult { ClassifyResult results(output); return std::move(results); }; return m_ClassifyClient->Enqueue(std::move(image_info), post, headers); } std::shared_future ImageClient::Detection(const std::string& model_name, const std::string& image_uuid) { ImageInfo image_info; image_info.set_model_name(model_name); image_info.set_image_uuid(image_uuid); std::map headers = {{"custom-metadata-model-name", model_name}}; auto post = [](ImageInfo& input, Detections& output, ::grpc::Status& status) -> DetectionResult { DetectionResult results(output); return std::move(results); }; return m_DetectionClient->Enqueue(std::move(image_info), post, headers); } ClassifyResult::ClassifyResult(const ::trtlab::deploy::image_client::Classifications& pb) : m_UUID(pb.image_uuid()) { } DetectionResult::DetectionResult(const ::trtlab::deploy::image_client::Detections& pb) : m_UUID(pb.image_uuid()) { } using PyClassifyFuture = std::shared_future; using PyDetectionFuture = std::shared_future; PYBIND11_MAKE_OPAQUE(PyClassifyFuture); PYBIND11_MAKE_OPAQUE(PyDetectionFuture); PYBIND11_MODULE(deploy_image_client, m) { py::class_>(m, "ImageClient") .def(py::init(), py::arg("hostname") = "trt.lab") .def("classify", &ImageClient::Classify) .def("detection", &ImageClient::Detection); py::class_>(m, "ClassifyFuture") .def("wait", &PyClassifyFuture::wait, py::call_guard()) .def("get", &PyClassifyFuture::get, py::call_guard()); py::class_>(m, "DetectionFuture") .def("wait", &PyDetectionFuture::wait, py::call_guard()) .def("get", &PyDetectionFuture::get, py::call_guard()); py::class_>(m, "ClassifyResult") .def_property_readonly("uuid", &ClassifyResult::UUID); py::class_>(m, "DetectionResult") .def_property_readonly("uuid", &DetectionResult::UUID); } ================================================ FILE: examples/Deployment/ImageClient/client.h ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include #include #include #include "nvrpc/client/client_unary.h" #include "api.grpc.pb.h" #include "api.pb.h" class ClassifyResult { public: ClassifyResult(const ::trtlab::deploy::image_client::Classifications&); const std::string& UUID() const { return m_UUID; } private: std::string m_UUID; }; struct DetectionResult { public: DetectionResult(const ::trtlab::deploy::image_client::Detections&); const std::string& UUID() const { return m_UUID; } private: std::string m_UUID; }; class ImageClient { public: ImageClient(std::string); ~ImageClient() {} std::shared_future Classify(const std::string&, const std::string&); std::shared_future Detection(const std::string&, const std::string&); private: using ImageInfo = ::trtlab::deploy::image_client::ImageInfo; using Classifications = ::trtlab::deploy::image_client::Classifications; using Detections = ::trtlab::deploy::image_client::Detections; std::unique_ptr<::nvrpc::client::ClientUnary> m_ClassifyClient; std::unique_ptr<::nvrpc::client::ClientUnary> m_DetectionClient; }; ================================================ FILE: examples/Deployment/ImageClient/client.py ================================================ ## Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. ## ## Redistribution and use in source and binary forms, with or without ## modification, are permitted provided that the following conditions ## are met: ## * Redistributions of source code must retain the above copyright ## notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright ## notice, this list of conditions and the following disclaimer in the ## documentation and/or other materials provided with the distribution. ## * Neither the name of NVIDIA CORPORATION nor the names of its ## contributors may be used to endorse or promote products derived ## from this software without specific prior written permission. ## ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ## PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY ## OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ## OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE import os import uuid import boto3 import deploy_image_client as client class ImageClient: def __init__(self, *, hostname = "trt.lab"): self._cpp_client = client.ImageClient(hostname) self._s3_client = self._get_s3_client() def classify(self, image_path, model): key = self._upload_to_s3(image_path) return self._cpp_client.classify(key, model) def object_detection(self, image_path, model): key = self._upload_to_s3(image_path) return self._cpp_client.object_detection(key, model) def _get_s3_client(self): kwargs = {} if os.environ.get("AWS_ENDPOINT_URL"): kwargs = { endpoint_url: os.environ.get("AWS_ENDPOINT_URL"), use_ssl: False, verify: False, } return boto3.client("s3", **kwargs) def _check_if_file(self, file_path): if not os.path.isfile(file_path): raise RuntimeError("{} is not a file".format(file_path)) def _upload_to_s3(self, image_path): self._check_if_file(image_path) key = str(uuid.uuid4()) with open(image_path, "rb") as data: self._s3_client.upload_fileobj(data, 'images', key) return key ================================================ FILE: examples/Deployment/Kubernetes/basic-trtis-deployment/deploy.yml ================================================ --- apiVersion: apps/v1 kind: Deployment metadata: name: basic-trtis-deployment namespace: trtlab spec: replicas: 1 selector: matchLabels: app: basic-trtis-deployment template: metadata: labels: app: basic-trtis-deployment annotations: sidecar.istio.io/inject: "true" spec: containers: - name: trtis image: nvcr.io/nvidia/tensorrtserver:19.02-py3 command: ["trtserver", "--model-store=/tmp/models"] imagePullPolicy: IfNotPresent resources: limits: nvidia.com/gpu: 1 ports: - name: http containerPort: 8000 - name: grpc containerPort: 8001 - name: metrics containerPort: 8002 livenessProbe: httpGet: path: /api/health/live port: http initialDelaySeconds: 10 periodSeconds: 5 readinessProbe: httpGet: path: /api/health/ready port: http initialDelaySeconds: 10 periodSeconds: 5 volumeMounts: - mountPath: /tmp/models name: model-store volumes: - name: model-store hostPath: path: /shared/trtis/example-model-store type: Directory --- apiVersion: v1 kind: Service metadata: name: basic-trtis-deployment namespace: trtlab labels: app: basic-trtis-deployment spec: selector: app: basic-trtis-deployment ports: - name: http port: 8000 targetPort: http - name: grpc port: 8001 targetPort: grpc - name: metrics port: 8002 targetPort: metrics ================================================ FILE: examples/Deployment/Kubernetes/basic-trtis-deployment/istio-ingress.yml ================================================ --- apiVersion: networking.istio.io/v1alpha3 kind: Gateway metadata: name: basic-trtis-deployment-gateway namespace: trtlab spec: selector: istio: ingressgateway servers: - port: name: http number: 80 protocol: grpc hosts: - "trt.lab" --- apiVersion: networking.istio.io/v1alpha3 kind: VirtualService metadata: name: basic-trtis-deployment-virtual-service namespace: trtlab spec: hosts: - "trt.lab" gateways: - basic-trtis-deployment-gateway http: - match: - uri: prefix: /api/health/ route: - destination: host: basic-trtis-deployment port: number: 8000 - match: - uri: prefix: / route: - destination: host: basic-trtis-deployment port: number: 8001 --- apiVersion: networking.istio.io/v1alpha3 kind: DestinationRule metadata: name: basic-trtis-deployment-load-balancer namespace: trtlab spec: host: basic-trtis-deployment trafficPolicy: loadBalancer: simple: LEAST_CONN ================================================ FILE: examples/Deployment/Kubernetes/basic-trtis-deployment/scrape-metrics.yml ================================================ --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: basic-trtis-deployment namespace: trtlab labels: scrape: nv-inference-metrics spec: selector: matchLabels: app: basic-trtis-deployment endpoints: - port: metrics interval: 2s honorLabels: true ================================================ FILE: examples/Deployment/ObjectStore/README.md ================================================ # Object Store In the Image Service example, the ImageClient separates out an inference request into two components: - a bulk data transfers to a backend store, - a gRPC request that contains the details of the request (model, file_handle, etc.) To implement this concept, we will use an S3-compatible Object Store. The example should work equally well on AWS S3 or via Rook's S3 CephObjectStore implementation running locally a Kubernetes cluster. For more details on how Kubernetes and Rook were installed, see the [NVIDIA DeepOps Project](https://github.com/nvidia/deepops). This folder contains some basic configuration files and scripts for preparing the ObjectStore for our Image Service. ## AWS S3 Simply sent your AWS API configuration or export the following environment variables. ``` export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= ``` ## Rook + Kubernetes You will need to modify some of the configuration files for your cluster. - `rook-s3.yml` options: - requires 3 unique hosts with bluestore backed OSDs - creates a `trtlab` user If you modify the name of the ObjectStore (`trtlab-s3`) and or the username (`trtlab`), be aware the `get_rook_s3_keys.sh` needs to be modified. Similarly, the examples uses `s3.trt.lab` as the endpoint on which the storage is hosted. If you change this, you will need to modify `get_rook_s3_keys.sh` to output the proper `AWS_ENDPOINT_URL`. You will also need to modify the ingress examples with the proper hostname. ``` kubectl apply -f rook-s3.yml kubectl apply -f ingress-nginx.yml ``` ### Setup your environment ``` eval $(./get_rook_s3_keys.sh) ``` ### Prepare your Image bucket Note: you will need to have python3 and boto installed. This does not have be done inside the container. ``` python3 create_buckets.py ``` ## TODOs - [ ] Export S3 keys to a Kubernetes Secret - [ ] Scripts for bucket maintenance: probably some k8s CronJobs - [ ] Update Istio ingress example ================================================ FILE: examples/Deployment/ObjectStore/create_buckets.py ================================================ import os import boto3 s3 = boto3.client("s3", use_ssl=False, verify=False, endpoint_url=os.environ.get("AWS_ENDPOINT_URL")) response = s3.list_buckets() buckets = [b["Name"] for b in response["Buckets"]] if "images" not in buckets: s3.create_bucket(Bucket="images") response = s3.list_buckets() buckets = [b["Name"] for b in response["Buckets"]] print(buckets) ================================================ FILE: examples/Deployment/ObjectStore/get_rook_s3_keys.sh ================================================ #!/bin/bash objstore=trtlab-s3 user=trtlab echo -n export AWS_ACCESS_KEY_ID= kubectl -n rook-ceph get secret rook-ceph-object-user-${objstore}-${user} -o yaml | grep AccessKey | awk '{print $2}' | base64 --decode echo echo -n export AWS_SECRET_ACCESS_KEY= kubectl -n rook-ceph get secret rook-ceph-object-user-${objstore}-${user} -o yaml | grep SecretKey | awk '{print $2}' | base64 --decode echo echo export AWS_ENDPOINT_URL=http://s3.trt.lab echo ================================================ FILE: examples/Deployment/ObjectStore/ingress-istio.yml ================================================ # not working yet --- apiVersion: networking.istio.io/v1alpha3 kind: Gateway metadata: name: trtlab-s3-gateway spec: selector: app: trtlab-s3 istio: ingressgateway servers: - hosts: - "s3.trt.lab" port: number: 80 name: http protocol: HTTP --- apiVersion: networking.istio.io/v1alpha3 kind: VirtualService metadata: name: trtlab-s3-virtualservice spec: hosts: - "s3.trt.lab" gateways: - trtlab-s3-gateway http: - match: - uri: prefix: / route: - destination: host: rook-ceph-rgw-trtlab-s3.rook-ceph port: number: 80 ================================================ FILE: examples/Deployment/ObjectStore/ingress-nginx.yml ================================================ apiVersion: extensions/v1beta1 kind: Ingress metadata: annotations: nginx.ingress.kubernetes.io/proxy-body-size: "0" nginx.ingress.kubernetes.io/proxy-read-timeout: "600" nginx.ingress.kubernetes.io/proxy-send-timeout: "600" name: trtlab-s3-ingress namespace: rook-ceph spec: rules: - host: s3.trt.lab http: paths: - backend: serviceName: rook-ceph-rgw-trtlab-s3 servicePort: 80 path: / ================================================ FILE: examples/Deployment/ObjectStore/rook-s3.yml ================================================ --- apiVersion: ceph.rook.io/v1 kind: CephObjectStore metadata: name: trtlab-s3 namespace: rook-ceph spec: metadataPool: failureDomain: host replicated: size: 3 dataPool: failureDomain: host erasureCoded: dataChunks: 2 codingChunks: 1 gateway: type: s3 sslCertificateRef: port: 80 securePort: instances: 1 allNodes: false --- apiVersion: ceph.rook.io/v1 kind: CephObjectStoreUser metadata: name: trtlab namespace: rook-ceph spec: store: trtlab-s3 displayName: "TensorRT Laboratory" ================================================ FILE: examples/Deployment/README.md ================================================ # Deploying Inference Services This document/example folder is a work in progress. Its intent is to cover various aspect of deployment including strategies, limitations, services and kubernetes examples of deploying inference services. Over the course of this guide, we will build a full end-to-end image processing service deployed on Kubernetes. Let us start by assuming all your models can be served/deployed with the [TensorRT Inference Server, aka TRTIS](https://github.com/nvidia/tensorrt-inference-server). One of the primary advantages of TRTIS is the ability to host multiple models in a single linux process. Given the capabilities of modern GPUs, this is the most efficient way to for multiple models to efficiently share of both compute and memory resources. Next, lets dive a little deeper into the features of TRTIS that gives this effiency advantage. - _Concurrent Executions_ allow for multiple independent inference batches to be in-flight on the device at a given time. This could be multiple batches of the same model or single batches of different models, or any combination imaginable. - Running on a Tesla V100 GPU with ResNet-152 Batch8 - Allowing only 1 in-flight batch8 yields XXX images/sec with a compute latency of YYY. - Allowing 8 concurrency batch8 executions increases the throughput to 2500 images/sec; however, the compute latency per batch increases to ZZZ. - To evaluate the performance of TensorRT models a function of concurrent in-flight executions we provide the [TensorRT/ConcurrentExecution](../TensorRT/ConcurrentExecution) example. ```bash infer.x --engine=/external/models/ResNet-152-b8-fp16.engine --concurrency=8 ``` - The value of concurrent executions will differ depending on the compute requirements of the model and the GPU on which it executes. The best practice is to benchmark and evaluates performance. - _Tunable Concurency_ enables you to specify on a per model basis the number of concurreny copies that can be executed at any given time. Follow the guidelines Concurrent Executions to tune this option on a per model basis. - _Dynamic Batching_ allows individual requests from either same or different clients to be multiplexed into a single mini-batch and infered. Dynamic Batching is performed on a per-model basis and can have a max `preferred_batch_size` and a `max_queue_delay_microseconds` which specific how long of a delay messages are allowed to accumulate. Batching is one of the best ways to improve throughput. Depending on your needs, you will want to balance the added latency required for batching vs the throughput improvements achieved. - Note: in scale-out deployments where unary (send/recv, not streaming) requests are being load-balanced across multiple TRTIS instances, the value of dynamic batching in the TRTIS decreases as the number of replicas increases. - TODO: We address this issue by creating Dynamic Batching Services that sit infront of the Load-Balancer. Add a discussion and update the [examples/03_Batching] example with the latest streaming Server/Client - _Custom Metrics_ provide application-level metrics on how the TRTIS service is performing. Analyzing these metrics can provide insight on when a service is being overloaded and when to add more resources. TRTIS and some TRTLAB examples expose Prometheus Metrics. - [examples/90_Kubernetes/prometheus] is one exmaple of how to use Kubernetes + Prometheus + Grafana to scrape and visualize metrics from running TRTIS services. - TODO: Document and clean up examples. The goal of this project is to provide supplementary support to TRTIS by providing buildings blocks that help you build companion microservices that worth with TRTIS, as well as example deployment scenarios. ## Configure a TRTIS Kubernetes Deployment In Kubernetes, a [Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) allows you to define a [Pod](https://kubernetes.io/docs/concepts/workloads/pods/pod/) and the number of copies of that pod, i.e. [ReplicaSet](https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/) that you would like deployed on your cluster. ### Build a Model Store The [TRTIS Model Store] is the collection of models that will be loaded and served by a TRTIS instance. For details on how to build a model store, see the [Deployment/ModelStore] example. In order to define our Kubernetes Deployment, we need to map our model store into our TRTIS Pod. There are multiple ways this can be achieved. - Extend the TRTIS container image and add the model store to the image - Mount an External Volume into the TRTIS Pod - Use another container in the same TRTIS Pod that will dynamically generate a model store from some external data sources, e.g. S3. - TODO: Add this example My Kubernetes cluster has an NFS mount at `/shared` on every node. For this example, our model store will be located at `/shared/trtis/example-model-store` ### Deploy TRTIS Pods ## Configure Multiple TRTIS Deployments with Different Sets of Models Now suppose you are serving more models than you can allocate on a single GPU. In this scenario, we can split our list of models into groups and spread those groups out over multiple TRTIS `Deployments` ## Scenario #1: 2 Models, 10 Servers, 20 GPUs Assume you have two computer vision models, e.g. classification, object detection, segmentation, that you wish to deploy on 10 servers. Probably the first questions you might ask yourself is what is the breakdown of load expected for each model. Is it 50/50? Or is it 90/10? Does it vary by time of day? Is it predictive? The strategy with TRTIS is simple, you deploy 20 replicas of the TRTIS service across 10 servers, 1 TRTIS service per GPU and tell your load-balancer to round-robin requests across your services. However, unless you customize TRTIS, by default, TRTIS only accepts raw tensors as inputs and returns raw tensors an outputs. What are your inputs? And what's a reasonable expectation for the rest of the inference pipeline? Let's assume you are receiving JPEG images. First, you need to decode the images to raw pixels, then you need to prepare the images to be inferred. To keep things simple, let's assume that both models use the same input preprocessing method. What is the compression ratio of your JPEG images? Assuming the images are 8-bit, then [a blog post by Graphics Mill](https://www.graphicsmill.com/blog/2014/11/06/Compression-ratio-for-different-JPEG-quality-values#.XHtdPpNKiXE_) measured the compression ratio to be 1:5.27 for JPEG Quality 100 (Q=100) and 1:43.27 for Q=55. For this discussion we wil focus on Q=80, which was measured to be a 1:25 ratio. This is the decompression ratio for JPEG bytes to INT8 pixel values. However, most DNN models after normalization accept tensors that are fp32 values as inputs. This means that we have a 1:100 an increased ratio of input bytes to bytes of input tensors. A 100KiB image becomes a 9.75MiB data structure that needs to be provided to TRTIS. This 100x increase in size is big, but not unreasonably large. If your images are coming in over the Internet/WAN, then your LAN connction is likely to be 100x faster. However, if this was video, that compression ratio per frame would be MUCH larger. To future-proof our implementation, we are going to do our best to minimize the amount of data we move around. Next, what is it that our users are providing to us beside just the image to be inferred. The client must inform the server which model should be used for the request. And similarly, the server probably also wants to know some details about the user. In this example, we will using a user API key to authenticate the user. Now let's look at the data payload the client will be sending our server. We are going to break down an image inference request into two parts: - the bulk image data - the request metadata: api key, image_uuid, model_name This separation allows us to commmunicate and move each component more optimally. We will send the JPEG image bytes directly to an object store and we will use a gRPC message to communicate the metadata for the request. Why not embed the image directly into the gRPC message? While this is certainly a possibility, we are chosing this separation because in our optimial pipeline, our client request message will go through several services before the bulk image data is needed. These services include an ingress router/load-balancer and an external batching service before they are sent to the image pre-processing service. This separate helps us future-proof our implementation by avoiding unnecessary data movement. Let's break down our client implementation: - 1) writes the jpeg image data to an S3-compatible object store as some UUID.jpg - 2) creates an async gRPC unary (send/recv) RPC request to our inference service. - the message payload consists of our client api key, image uuid and model_name - add custom headers that will enable our ingress router/load-balancer to properly route the message to the correct target without the need to deserialize the request payload. this allows us to route message directly to services specify to some metadata. In this case, we will add the model name to the headers so we can eventually route requests to batching services unique to that model. This means our client can continue to issue async inference requests with the promise that the results will returned in some future time. On the server side, this data move break down the inference request into two components, the data and the request. Let's assume our incoming data transport is very effficient. The images will be deposited into an S3-compatible object store. On my Kubernetes cluster, I'll be using [Rook's Mino/S3 Operator](https://rook.io), but it would work equally well on AWS. ================================================ FILE: examples/Deployment/RouteRequests/CMakeLists.txt ================================================ add_executable(test_image_service.x test_service.cc ) target_link_libraries(test_image_service.x PUBLIC nvrpc deploy-image-client-protos gflags ) ================================================ FILE: examples/Deployment/RouteRequests/README.md ================================================ # Routing Requests If we have multiple instances of TRTIS each with different models, we need a way to route requests to the proper service. There are two convenient options: routing by subdomain or routing by headers. In this example, we will have three unique pools of TRTIS services: - Pool A: only handles `model_a` `Classify` requests - Pool B: only handles `model_b` requests (`Classify` or `Detection`) - General Pool: handles all other requests In our hypothetical deployment scenario, both `model_a` and `model_b` are particularly active so we have dedicated resources to handle those requests. Similarly, our general pool has it's own fixed size. Later, we will show out to auto-scale pods based on TRTIS and GPU metrics. A simple approach would be to host `model_a.trt.lab`, `model_b.trt.lab` and `general_pool.trt.lab` and have the client make the decision sender side on where to send the requests. However, changest to the service layout would require updates to the clients software which makes this option less appealing. Ideally, we want our entire service to be hosted on a single endpoint `trt.lab`. To ensure our requests arrive at the proper destination server-side, we have our client add [Custom Metadata](https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md) to each gRPC request. ```c++ std::map headers = {{"custom-metadata-model-name", model_name}}; ``` which inside our client library ```c++ for (auto& header : headers) { // add headers to ::grpc::ClientContext ctx->m_Context.AddMetadata(header.first, header.second); } ``` To test routing, we have provided an `envoy_config.yaml` configuration. The load-balancer/router listens on port 50050 and routes to three sample services running on 51051, 51052 and 51053. To differentiate by endpoint, `Classify` or `Detection`, we can match routes on their `uri`. Here is the relevant parts of the envoy config: ```yaml - match: prefix: /trtlab.deploy.image_client.Inference/Classify headers: - name: custom-metadata-model-name exact_match: model_a grpc: route: cluster: classify_model_a - match: prefix: / headers: - name: custom-metadata-model-name exact_match: model_b grpc: route: cluster: model_b - match: prefix: / grpc: route: cluster: general_pool ``` `test_routing.sh` provides a convenient means to test the configuration. It will compile a simple implementation of the ImageClient service, bring up an instance of Envoy, 3 instances of the test_service, then send both `Classify` and `Detection` requests with three different models at the router. The service implementation simple returns which named service handled the request. ```s root@5e8ffb38df87:/work/examples/Deployment/RouteRequests# ./test_routing.sh ... some start up output ... Testing Classify RPC I0307 13:41:36.614954 355 test_service.cc:74] model_a served by model_a I0307 13:41:36.616070 359 test_service.cc:74] model_b served by model_b I0307 13:41:36.617031 362 test_service.cc:74] model_c served by general_pool Testing Detection RPC I0307 13:41:36.617636 362 test_service.cc:74] model_a served by general_pool I0307 13:41:36.618005 359 test_service.cc:74] model_b served by model_b I0307 13:41:36.618367 362 test_service.cc:74] model_c served by general_pool **** Test Passed **** ``` While we are using Envoy (v1.9) directly in this example, we will later show how this can be accomplished in Istio. A major TODO in this project is to build a TRTIS operator while will provide Kubernetes CRD that will be able to dynamically manage the routes as a function of where the model will be loaded on the cluster. Whereas this example shows static placement, we eventually want to get to fully dynamic routes. ================================================ FILE: examples/Deployment/RouteRequests/envoy_config.yaml ================================================ static_resources: listeners: - name: listener_0 address: socket_address: { address: 0.0.0.0, port_value: 50050 } filter_chains: - filters: - name: envoy.http_connection_manager config: codec_type: auto stat_prefix: ingress_http route_config: name: local_route virtual_hosts: - name: backend domains: - "*" routes: - match: prefix: /trtlab.deploy.image_client.Inference/Classify headers: # - name: content-type # value: application/grpc - name: custom-metadata-model-name exact_match: model_a grpc: route: cluster: classify_model_a - match: prefix: / headers: # - name: content-type # value: application/grpc - name: custom-metadata-model-name exact_match: model_b grpc: route: cluster: model_b - match: prefix: / # headers: # - name: content-type # value: application/grpc grpc: route: cluster: general_pool http_filters: - name: envoy.router config: {} clusters: - name: classify_model_a connect_timeout: 0.25s type: strict_dns lb_policy: round_robin http2_protocol_options: {} hosts: - socket_address: address: 127.0.0.1 port_value: 51051 - name: model_b connect_timeout: 0.25s type: strict_dns lb_policy: round_robin http2_protocol_options: {} hosts: - socket_address: address: 127.0.0.1 port_value: 51052 - name: general_pool connect_timeout: 0.25s type: strict_dns lb_policy: round_robin http2_protocol_options: {} hosts: - socket_address: address: 127.0.0.1 port_value: 51053 admin: access_log_path: "/dev/null" address: socket_address: address: 0.0.0.0 port_value: 8001 ================================================ FILE: examples/Deployment/RouteRequests/test_client.py ================================================ import os import deploy_image_client as cpp_client def main(): if not os.environ.get("TRTLAB_ROUTING_TEST"): raise RuntimeError( "Plese run this script in the environment setup by test_routing.sh") router = cpp_client.ImageClient("localhost:50050") print("Testing Classify RPC") a = router.classify("model_a", "via_router_uuid1").get() b = router.classify("model_b", "via_router_uuid2").get() c = router.classify("model_c", "via_router_uuid3").get() assert a.uuid == "model_a" assert b.uuid == "model_b" assert c.uuid == "general_pool" print("Testing Detection RPC") a = router.detection("model_a", "via_router_uuid1").get() b = router.detection("model_b", "via_router_uuid2").get() c = router.detection("model_c", "via_router_uuid3").get() assert a.uuid == "general_pool" assert b.uuid == "model_b" assert c.uuid == "general_pool" print("\n**** Test Passed ****\n") if __name__ == "__main__": try: main() except RuntimeError as e: print("\n**** Error ****") print(e) print() ================================================ FILE: examples/Deployment/RouteRequests/test_routing.sh ================================================ #!/bin/bash cleanup() { kill $(jobs -p) ||: } trap "cleanup" EXIT SIGINT SIGTERM (cd /work/build/examples/Deployment/ImageClient; make) (cd /work/build/examples/Deployment/RouteRequests; make) export PYTHONPATH=$PYTHONPATH:/work/build/examples/Deployment/ImageClient exe=/work/build/examples/Deployment/RouteRequests/test_image_service.x $exe --hostname="model_a" --ip_port="0.0.0.0:51051" & #> /dev/null 2>&1 & $exe --hostname="model_b" --ip_port="0.0.0.0:51052" & #> /dev/null 2>&1 & $exe --hostname="general_pool" --ip_port="0.0.0.0:51053" & #> /dev/null 2>&1 & envoy -c envoy_config.yaml > /dev/null 2>&1 & wait-for-it.sh localhost:50050 --timeout=0 -- echo "Envoy on 50050 ready" wait-for-it.sh localhost:51051 --timeout=0 -- echo "ModelA on 51051 ready" wait-for-it.sh localhost:51052 --timeout=0 -- echo "ModelB on 51052 ready" wait-for-it.sh localhost:51053 --timeout=0 -- echo "General Pool on 51053 ready" export TRTLAB_ROUTING_TEST=True python3 test_client.py ================================================ FILE: examples/Deployment/RouteRequests/test_service.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "tensorrt/laboratory/core/pool.h" #include "tensorrt/laboratory/core/resources.h" #include "tensorrt/laboratory/core/thread_pool.h" using trtlab::Resources; using trtlab::ThreadPool; #include "nvrpc/executor.h" #include "nvrpc/server.h" #include "nvrpc/service.h" using nvrpc::AsyncRPC; using nvrpc::AsyncService; using nvrpc::Context; using nvrpc::Executor; using nvrpc::Server; #include "api.grpc.pb.h" #include "api.pb.h" using trtlab::deploy::image_client::Classifications; using trtlab::deploy::image_client::Detections; using trtlab::deploy::image_client::ImageInfo; using trtlab::deploy::image_client::Inference; // CLI Options DEFINE_string(hostname, "localhost", "Hostname"); DEFINE_string(ip_port, "0.0.0.0:50051", "IP/Port on which to listen"); class TestResources : public Resources { public: TestResources(const std::string& hostname) : m_Hostname(hostname) {} const std::string& Hostname() const { return m_Hostname; } private: std::string m_Hostname; }; template class TestContext final : public Context { void ExecuteRPC(ImageInfo& input, Output& output) final override { LOG(INFO) << input.model_name() << " served by " << this->GetResources()->Hostname(); output.set_image_uuid(this->GetResources()->Hostname()); this->FinishResponse(); } }; int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("test_deploy_client"); ::google::ParseCommandLineFlags(&argc, &argv, true); Server server(FLAGS_ip_port); auto service = server.RegisterAsyncService(); auto rpc_classify = service->RegisterRPC>( &Inference::AsyncService::RequestClassify); auto rpc_detection = service->RegisterRPC>(&Inference::AsyncService::RequestDetection); auto resources = std::make_shared(FLAGS_hostname); auto executor = server.RegisterExecutor(new Executor(1)); executor->RegisterContexts(rpc_classify, resources, 1); executor->RegisterContexts(rpc_detection, resources, 1); server.Run(std::chrono::milliseconds(2000), [] {}); } ================================================ FILE: examples/Deployment/batcher.cc ================================================ template class BatchingService { public: using PrepareFn = std::function>( ::grpc::ClientContext*, ::grpc::CompletionQueue*)>; using Callback = std::function; struct MessageType { Request& request; Response& response; Callback callback; }; class Resources { public: Resources(PrepareFn prepare_fn, std::shared_ptr executor, std::shared_ptr<::trtlab::ThreadPool> post_process uint32_t max_batch_size, uint64_t timeout_in_us) : m_PrepareFn(prepare_fn), m_Executor(executor), m_WaitAndDone(post_process), m_MaxBatchSize(max_batch_size), m_Timeout(timeout_in_us) { } std::shared_ptr> CreateClient(std::function on_recv) { auto on_sent = [](Request&& request) {}; return std::make_shared>( m_PrepareFn, m_Executor, on_sent, on_recv); } void Enqueue(Request& req, Response& resp, Callback callback) { m_MessageQueue.enqueue(MessageType{req, resp, callback}); } protected: void BatchingEngine() { constexpr uint64_t quanta = 100; const double timeout = static_cast(m_Timeout - quanta) / 1000000.0; const size_t max_batch_size = m_MaxBatchSize; size_t total_count; size_t max_deque; std::shared_ptr> messages; std::chrono::time_point start; thread_local ConsumerToken token(m_MessageQueue); // clang-format off auto elapsed_time = [](std::chrono::time_point& start) -> double { return std::chrono::duration( std::chrono::high_resolution_clock::now() - start).count(); }; // clang-format on for(;;) { MessageType messages[m_MaxBatchsize]; max_batch = m_MaxBatchsize; total_count = 0; // pull 1 element from the queue and start timer // if dequeue times outs, then restart the loop total_count = m_MessageQueue.wait_dequeue_bulk_timed( token, &(*messages)[total_count], 1, quanta); max_deque = max_batch_size - total_count; if(count == 0) { continue; } // Create a Corked Stream - Corked = buffered writes stream = CreateClient([messages](Response&& response) mutable { CHECK(!messges.empty()); DLOG(INFO) << "Finishing Unary Response/Callback: " << message.size() << " remain on queue"; auto m = messages.front(); m.response = std::move(response); m.callback(); messages.erase(messages.begin()); }); stream->Corked(true); // Continue to collect inference requests until we reach a maximum batch size // or we hit the timeout. We will eagerly forward our current batch items along // the stream so the preprocessor can get ahead start start = std::chrono::high_resolution_clock::now(); while(total_count < max_batch_size && elapsed(start) <) { total_count += total_count = m_MessageQueue.wait_dequeue_bulk_timed( token, &(*messages)[total_count], max_deque, quanta); max_deque = max_batch_size - total_count; for(; isend < total_count; isend++) { auto& m = (*messages)[isend]; stream->Write(m.request)); } } // Batching complete if(total_count) { messages->resize(total_count); stream->Done(); m_WaitAndFinish.enqueue([stream]() mutable { auto future = stream->Status(); future.wait(); streawm.reset(); }); messages.reset(new std::vector); messages->resize(max_batch_size); } } } private: PrepareFn m_PrepareFn; std::shared_ptr m_Executor; std::shared_ptr<::trtlab::ThreadPool> m_WaitAndDelete; size_t m_MaxBatchsize; uint64_t m_Timeout; BlockingConcurrentQueue m_MessageQueue; }; class BatchingContext : public Context { void ExecuteRPC(Request& request, Response& response) final override { LOG(INFO) << "incoming unary request"; this->GetResources()->Enqueue(&request, &response, [this](bool ok) { if(ok) { this->FinishResponse(); } else { LOG(ERROR) << "Upstream Error"; this->CancelResponse(); } }); } }; }; ================================================ FILE: examples/ONNX/resnet50/README.md ================================================ # TensorRT ResNet50 Example - `fetch.sh` downloads the onnx model, test data, and calibration images from S3 - after running this script the `resnet50` and `calibration_images` directories should be present in your local path - Build (`build.py`) TensorRT engines from the `model.onnx` file - cli options: - `--batch` will select the batch size, multiple can be given, a separate engine for each batch size will be generated. - `--precision` can be `fp32`, `fp16`, or `int8`. if multiple precision are given, an engine for each will be created. - **Note**: To use `int8` precision, you will need a Turing, Volta, or Pascal GPU with compute capability 6.1. - If you have a Turing or Volta GPU, then run the following commmand which will generates 4 engines: ``` ./build.py --batch=1 --batch=8 --precision=fp16 --precision=int8 resnet50/model.onnx ``` - If you have a Pascal GPU, run the following which generates 2 engines: ``` ./build.py --batch=1 --batch=8 --precision=fp32 resnet50/model.onnx ``` - Functional Test - `./run_onnx_tests.py model-b1-fp16.engine` will run the onnx tests - Benchmark TensorRT engines at different batch sizes and concurrent executions: - `/work/build/examples/00_TensorRT/infer.x --engine=model-b1-fp16.engine --contexts=1` - `/work/build/examples/00_TensorRT/infer.x --engine=model-b1-fp16.engine --contexts=8` - `/work/build/examples/00_TensorRT/infer.x --engine=model-b8-fp16.engine --contexts=1` - `/work/build/examples/00_TensorRT/infer.x --engine=model-b8-fp16.engine --contexts=6` - `./run_jpeg_test.py --image=images/broccoli-3784.jpg model-b1-fp16.engine` - Note this example requires MxNet for image preprocessing: `pip install mxnet` - On a V100 using FP16, your results should be close to ``` *** Results *** broccoli 0.9511453 ``` - `./run_jpeg_test.py --image=images/broccoli-3784.jpg model-b1-int8.engine` - When using INT8, your results should be close to ``` *** Results *** broccoli 0.9228073 ``` ## Credits - [broccoli image](https://www.openfotos.com/view/broccoli-3784) - OpenFotos - https://www.openfotos.com/pages/open-fotos-license - [calibration images](calibration_images.csv) - Images from OpenFotos and Pixabay - https://www.openfotos.com/pages/open-fotos-license - https://pixabay.com/service/license/ ## TODOs - [ ] Update `run_jpeg_test.py` to highlight the async interface. ================================================ FILE: examples/ONNX/resnet50/build.py ================================================ #!/usr/bin/env python3 # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # import os import subprocess import click import int8 precision_opts = { "fp32": "", "fp16": "--fp16", "int8": "--fp16 --int8", } File = click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True) @click.command() @click.option("--batch", type=click.IntRange(min=1, max=32), multiple=True) @click.option("--precision", type=click.Choice(["fp32", "fp16", "int8"]), multiple=True) @click.argument("models", type=File, nargs=-1) def main(models, batch, precision): for model in models: #click.echo(model) #click.echo(precision) for p in precision: #click.echo(p) for b in batch: #click.echo(b) n = "b{}-{}".format(b, p) m = os.path.basename(model) m, ext = os.path.splitext(m) e = "{}-{}.{}".format(m,n,"engine") if os.path.isfile(e): print("A TensorRT engine {} already exists! Skipping...".format(e)) continue elif p == "int8": assert os.path.isdir("./calibration_images"), "Need to download calibration images before creating INT8 engine!" int8.build_int8_engine_onnx(model, "./calibration_images", b, 32, e) else: subprocess.call("trtexec --onnx={} --batch={} {} --saveEngine={}".format(model, b, precision_opts.get(p), e), shell=True) if __name__ == "__main__": main() ================================================ FILE: examples/ONNX/resnet50/calibration_images.csv ================================================ url,license,label https://www.openfotos.com/pictures/red-rock-crab-1096.full.jpg,Open Fotos License,crab https://www.openfotos.com/pictures/lazy-cats-4109.full.jpg,Open Fotos License,cat https://www.openfotos.com/pictures/beautiful-elephants-4005.full.jpg,Open Fotos License,elephants https://www.openfotos.com/pictures/funny-wild-pigs-437.full.jpg,Open Fotos License,pig https://www.openfotos.com/pictures/hopping-5017.full.jpg,Open Fotos License,bird https://www.openfotos.com/pictures/old-car-4811.full.jpg,Open Fotos License,car https://www.openfotos.com/pictures/audi-4740.full.jpg,Open Fotos License,car https://www.openfotos.com/pictures/boarding-in-the-plane-4891.full.jpg,Open Fotos License,plane https://cdn.pixabay.com/photo/2016/03/09/09/28/bear-1245807_960_720.jpg,Pixabay License,bear https://cdn.pixabay.com/photo/2015/02/26/06/09/panda-649938_960_720.jpg,Pixabay License,giant panda https://cdn.pixabay.com/photo/2015/09/22/19/00/ship-952292_960_720.jpg,Pixabay License,boat https://cdn.pixabay.com/photo/2017/01/16/19/17/horses-1984977_960_720.jpg,Pixabay License,horse https://cdn.pixabay.com/photo/2015/03/26/09/54/pug-690566_960_720.jpg,Pixabay License,dog https://cdn.pixabay.com/photo/2016/08/19/15/23/lizard-1605515_960_720.jpg,Pixabay License,lizzard https://cdn.pixabay.com/photo/2016/07/09/12/16/apple-1506119_960_720.jpg,Pixabay License,apple https://cdn.pixabay.com/photo/2016/10/07/14/11/tangerines-1721633_960_720.jpg,Pixabay License,orange https://cdn.pixabay.com/photo/2015/06/19/16/48/watermelon-815072_960_720.jpg,Pixabay License,watermelon https://cdn.pixabay.com/photo/2017/06/09/16/39/carrots-2387394_960_720.jpg,Pixabay License,carrot https://cdn.pixabay.com/photo/2019/02/28/22/45/hippo-4027011_960_720.jpg,Pixabay License,hippo https://cdn.pixabay.com/photo/2012/03/04/00/09/africa-21787_960_720.jpg,Pixabay License,lion https://cdn.pixabay.com/photo/2015/07/27/19/47/turtle-863336_960_720.jpg,Pixabay License,turtle https://cdn.pixabay.com/photo/2018/04/15/17/45/fish-3322230_960_720.jpg,Pixabay License,fish https://cdn.pixabay.com/photo/2013/11/01/11/13/dolphin-203875_960_720.jpg,Pixabay License,dolphin https://cdn.pixabay.com/photo/2014/05/02/21/49/home-office-336373_960_720.jpg,Pixabay License,laptop https://cdn.pixabay.com/photo/2012/05/18/21/45/tiger-mosquito-49141_960_720.jpg,Pixabay License,mosquito https://cdn.pixabay.com/photo/2016/08/09/13/21/coffee-1580595_960_720.jpg,Pixabay License,cup https://cdn.pixabay.com/photo/2016/02/19/10/36/lemons-1209309_960_720.jpg,Pixabay License,lemon https://cdn.pixabay.com/photo/2018/12/14/02/41/chengdu-3874136_960_720.jpg,Pixabay License,hotpot https://cdn.pixabay.com/photo/2015/09/05/12/53/violin-924349_960_720.jpg,Pixabay License,violin https://cdn.pixabay.com/photo/2015/11/07/11/22/pillows-1031079_960_720.jpg,Pixabay License,pillow https://cdn.pixabay.com/photo/2016/11/18/22/26/animal-1837164_960_720.jpg,Pixabay License,rhino https://cdn.pixabay.com/photo/2016/12/04/21/58/rabbit-1882699_960_720.jpg,Pixabay License,rabbit https://cdn.pixabay.com/photo/2014/10/23/18/56/tiger-500118_960_720.jpg,Pixabay License,tiger ================================================ FILE: examples/ONNX/resnet50/calibrator.py ================================================ # # Copyright 1993-2019 NVIDIA Corporation. All rights reserved. # # NOTICE TO LICENSEE: # # This source code and/or documentation ("Licensed Deliverables") are # subject to NVIDIA intellectual property rights under U.S. and # international Copyright laws. # # These Licensed Deliverables contained herein is PROPRIETARY and # CONFIDENTIAL to NVIDIA and is being provided under the terms and # conditions of a form of NVIDIA software license agreement by and # between NVIDIA and Licensee ("License Agreement") or electronically # accepted by Licensee. Notwithstanding any terms or conditions to # the contrary in the License Agreement, reproduction or disclosure # of the Licensed Deliverables to any third party without the express # written consent of NVIDIA is prohibited. # # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE # OF THESE LICENSED DELIVERABLES. # # U.S. Government End Users. These Licensed Deliverables are a # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT # 1995), consisting of "commercial computer software" and "commercial # computer software documentation" as such terms are used in 48 # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government # only as a commercial end item. Consistent with 48 C.F.R.12.212 and # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all # U.S. Government End Users acquire the Licensed Deliverables with # only those rights set forth herein. # # Any use of the Licensed Deliverables in individual and commercial # software must include, in the user documentation and internal # comments to the code, the above Disclaimer and U.S. Government End # Users Notice. import tensorrt as trt import os import pycuda.driver as cuda import pycuda.autoinit import matplotlib.pyplot as plt import mxnet as mx from mxnet.gluon.data.vision import transforms import numpy as np from random import shuffle class ONNXEntropyCalibrator(trt.IInt8EntropyCalibrator): def __init__(self, image_dir, batch_size, calibration_batches, cache_file): # Whenever you specify a custom constructor for a TensorRT class, # you MUST call the constructor of the parent explicitly. trt.IInt8EntropyCalibrator.__init__(self) self.cache_file = cache_file # Get a list of all the images in the image directory. image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir)] shuffle(image_files) if len(image_files) < calibration_batches * batch_size: print("Only found enough images for {} batches instead of {}, continuing anyway...".format(len(image_files) // batch_size, calibration_batches)) self.image_files = image_files else: self.image_files = image_files[:calibration_batches * batch_size] # Keeps track of current image in image list self.current_image = 0 self.batch_size = batch_size self.input_size = [3,224,224] # Each element of the calibration data is a float32. self.device_input = cuda.mem_alloc(self.batch_size * self.input_size[0] * self.input_size[1] * self.input_size[2] * trt.float32.itemsize) # Create a generator that will give us batches. We can use next() to iterate over the result. def load_batches(): while self.current_image < len(self.image_files): data, images_read = self.read_image_batch() self.current_image += images_read yield data self.batches = load_batches() def transform_image(self, img): transform_fn = transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) img = transform_fn(mx.nd.array(img)).asnumpy() return img # This function is used to load calibration images into batches. def read_image_batch(self): # Depending on batch size and number of images, the final batch might only be partially full. images_to_read = min(self.batch_size, len(self.image_files) - self.current_image) host_buffer = np.zeros(shape=[self.batch_size]+self.input_size) for i in range(images_to_read): img = np.array(plt.imread(self.image_files[self.current_image])) img = self.transform_image(img) host_buffer[i,:,:,:] = img return host_buffer, images_to_read def get_batch_size(self): return self.batch_size # TensorRT passes along the names of the engine bindings to the get_batch function. # You don't necessarily have to use them, but they can be useful to understand the order of # the inputs. The bindings list is expected to have the same ordering as 'names'. def get_batch(self, names): try: # Get a single batch. data = np.ascontiguousarray(next(self.batches), np.float32) # Copy to device, then return a list containing pointers to input device buffers. cuda.memcpy_htod(self.device_input, data) return [int(self.device_input)] except StopIteration: # When we're out of batches, we return either [] or None. # This signals to TensorRT that there is no calibration data remaining. return None def read_calibration_cache(self): # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None. if os.path.exists(self.cache_file): with open(self.cache_file, "rb") as f: return f.read() def write_calibration_cache(self, cache): with open(self.cache_file, "wb") as f: f.write(cache) ================================================ FILE: examples/ONNX/resnet50/fetch.sh ================================================ #!/bin/bash if [ ! -e "resnet50.tar.gz" ]; then wget https://s3.amazonaws.com/download.onnx/models/opset_8/resnet50.tar.gz fi if [ ! -e "open_source_images.tar.gz" ]; then wget https://s3-us-west-2.amazonaws.com/com.nvidia.tensorrt-laboratory/open_source_images.tar.gz fi if md5sum -c resnet50.md5; then if [ ! -e "resnet50" ]; then tar xzf resnet50.tar.gz fi echo "ResNet50 download good" else echo "ResNet50 md5 checksum failed" exit 911 fi if md5sum -c open_source_images.md5; then if [ ! -e "calibration_images" ]; then tar xf open_source_images.tar.gz fi echo "All good - Continue to Build Phase" else echo "calibration_images md5 checksum failed" exit 911 fi ================================================ FILE: examples/ONNX/resnet50/imagenet_labels.py ================================================ labels = {0: 'tench, Tinca tinca', 1: 'goldfish, Carassius auratus', 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias', 3: 'tiger shark, Galeocerdo cuvieri', 4: 'hammerhead, hammerhead shark', 5: 'electric ray, crampfish, numbfish, torpedo', 6: 'stingray', 7: 'cock', 8: 'hen', 9: 'ostrich, Struthio camelus', 10: 'brambling, Fringilla montifringilla', 11: 'goldfinch, Carduelis carduelis', 12: 'house finch, linnet, Carpodacus mexicanus', 13: 'junco, snowbird', 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea', 15: 'robin, American robin, Turdus migratorius', 16: 'bulbul', 17: 'jay', 18: 'magpie', 19: 'chickadee', 20: 'water ouzel, dipper', 21: 'kite', 22: 'bald eagle, American eagle, Haliaeetus leucocephalus', 23: 'vulture', 24: 'great grey owl, great gray owl, Strix nebulosa', 25: 'European fire salamander, Salamandra salamandra', 26: 'common newt, Triturus vulgaris', 27: 'eft', 28: 'spotted salamander, Ambystoma maculatum', 29: 'axolotl, mud puppy, Ambystoma mexicanum', 30: 'bullfrog, Rana catesbeiana', 31: 'tree frog, tree-frog', 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui', 33: 'loggerhead, loggerhead turtle, Caretta caretta', 34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea', 35: 'mud turtle', 36: 'terrapin', 37: 'box turtle, box tortoise', 38: 'banded gecko', 39: 'common iguana, iguana, Iguana iguana', 40: 'American chameleon, anole, Anolis carolinensis', 41: 'whiptail, whiptail lizard', 42: 'agama', 43: 'frilled lizard, Chlamydosaurus kingi', 44: 'alligator lizard', 45: 'Gila monster, Heloderma suspectum', 46: 'green lizard, Lacerta viridis', 47: 'African chameleon, Chamaeleo chamaeleon', 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis', 49: 'African crocodile, Nile crocodile, Crocodylus niloticus', 50: 'American alligator, Alligator mississipiensis', 51: 'triceratops', 52: 'thunder snake, worm snake, Carphophis amoenus', 53: 'ringneck snake, ring-necked snake, ring snake', 54: 'hognose snake, puff adder, sand viper', 55: 'green snake, grass snake', 56: 'king snake, kingsnake', 57: 'garter snake, grass snake', 58: 'water snake', 59: 'vine snake', 60: 'night snake, Hypsiglena torquata', 61: 'boa constrictor, Constrictor constrictor', 62: 'rock python, rock snake, Python sebae', 63: 'Indian cobra, Naja naja', 64: 'green mamba', 65: 'sea snake', 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus', 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus', 68: 'sidewinder, horned rattlesnake, Crotalus cerastes', 69: 'trilobite', 70: 'harvestman, daddy longlegs, Phalangium opilio', 71: 'scorpion', 72: 'black and gold garden spider, Argiope aurantia', 73: 'barn spider, Araneus cavaticus', 74: 'garden spider, Aranea diademata', 75: 'black widow, Latrodectus mactans', 76: 'tarantula', 77: 'wolf spider, hunting spider', 78: 'tick', 79: 'centipede', 80: 'black grouse', 81: 'ptarmigan', 82: 'ruffed grouse, partridge, Bonasa umbellus', 83: 'prairie chicken, prairie grouse, prairie fowl', 84: 'peacock', 85: 'quail', 86: 'partridge', 87: 'African grey, African gray, Psittacus erithacus', 88: 'macaw', 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita', 90: 'lorikeet', 91: 'coucal', 92: 'bee eater', 93: 'hornbill', 94: 'hummingbird', 95: 'jacamar', 96: 'toucan', 97: 'drake', 98: 'red-breasted merganser, Mergus serrator', 99: 'goose', 100: 'black swan, Cygnus atratus', 101: 'tusker', 102: 'echidna, spiny anteater, anteater', 103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus', 104: 'wallaby, brush kangaroo', 105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus', 106: 'wombat', 107: 'jellyfish', 108: 'sea anemone, anemone', 109: 'brain coral', 110: 'flatworm, platyhelminth', 111: 'nematode, nematode worm, roundworm', 112: 'conch', 113: 'snail', 114: 'slug', 115: 'sea slug, nudibranch', 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore', 117: 'chambered nautilus, pearly nautilus, nautilus', 118: 'Dungeness crab, Cancer magister', 119: 'rock crab, Cancer irroratus', 120: 'fiddler crab', 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica', 122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus', 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish', 124: 'crayfish, crawfish, crawdad, crawdaddy', 125: 'hermit crab', 126: 'isopod', 127: 'white stork, Ciconia ciconia', 128: 'black stork, Ciconia nigra', 129: 'spoonbill', 130: 'flamingo', 131: 'little blue heron, Egretta caerulea', 132: 'American egret, great white heron, Egretta albus', 133: 'bittern', 134: 'crane', 135: 'limpkin, Aramus pictus', 136: 'European gallinule, Porphyrio porphyrio', 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana', 138: 'bustard', 139: 'ruddy turnstone, Arenaria interpres', 140: 'red-backed sandpiper, dunlin, Erolia alpina', 141: 'redshank, Tringa totanus', 142: 'dowitcher', 143: 'oystercatcher, oyster catcher', 144: 'pelican', 145: 'king penguin, Aptenodytes patagonica', 146: 'albatross, mollymawk', 147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus', 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca', 149: 'dugong, Dugong dugon', 150: 'sea lion', 151: 'Chihuahua', 152: 'Japanese spaniel', 153: 'Maltese dog, Maltese terrier, Maltese', 154: 'Pekinese, Pekingese, Peke', 155: 'Shih-Tzu', 156: 'Blenheim spaniel', 157: 'papillon', 158: 'toy terrier', 159: 'Rhodesian ridgeback', 160: 'Afghan hound, Afghan', 161: 'basset, basset hound', 162: 'beagle', 163: 'bloodhound, sleuthhound', 164: 'bluetick', 165: 'black-and-tan coonhound', 166: 'Walker hound, Walker foxhound', 167: 'English foxhound', 168: 'redbone', 169: 'borzoi, Russian wolfhound', 170: 'Irish wolfhound', 171: 'Italian greyhound', 172: 'whippet', 173: 'Ibizan hound, Ibizan Podenco', 174: 'Norwegian elkhound, elkhound', 175: 'otterhound, otter hound', 176: 'Saluki, gazelle hound', 177: 'Scottish deerhound, deerhound', 178: 'Weimaraner', 179: 'Staffordshire bullterrier, Staffordshire bull terrier', 180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier', 181: 'Bedlington terrier', 182: 'Border terrier', 183: 'Kerry blue terrier', 184: 'Irish terrier', 185: 'Norfolk terrier', 186: 'Norwich terrier', 187: 'Yorkshire terrier', 188: 'wire-haired fox terrier', 189: 'Lakeland terrier', 190: 'Sealyham terrier, Sealyham', 191: 'Airedale, Airedale terrier', 192: 'cairn, cairn terrier', 193: 'Australian terrier', 194: 'Dandie Dinmont, Dandie Dinmont terrier', 195: 'Boston bull, Boston terrier', 196: 'miniature schnauzer', 197: 'giant schnauzer', 198: 'standard schnauzer', 199: 'Scotch terrier, Scottish terrier, Scottie', 200: 'Tibetan terrier, chrysanthemum dog', 201: 'silky terrier, Sydney silky', 202: 'soft-coated wheaten terrier', 203: 'West Highland white terrier', 204: 'Lhasa, Lhasa apso', 205: 'flat-coated retriever', 206: 'curly-coated retriever', 207: 'golden retriever', 208: 'Labrador retriever', 209: 'Chesapeake Bay retriever', 210: 'German short-haired pointer', 211: 'vizsla, Hungarian pointer', 212: 'English setter', 213: 'Irish setter, red setter', 214: 'Gordon setter', 215: 'Brittany spaniel', 216: 'clumber, clumber spaniel', 217: 'English springer, English springer spaniel', 218: 'Welsh springer spaniel', 219: 'cocker spaniel, English cocker spaniel, cocker', 220: 'Sussex spaniel', 221: 'Irish water spaniel', 222: 'kuvasz', 223: 'schipperke', 224: 'groenendael', 225: 'malinois', 226: 'briard', 227: 'kelpie', 228: 'komondor', 229: 'Old English sheepdog, bobtail', 230: 'Shetland sheepdog, Shetland sheep dog, Shetland', 231: 'collie', 232: 'Border collie', 233: 'Bouvier des Flandres, Bouviers des Flandres', 234: 'Rottweiler', 235: 'German shepherd, German shepherd dog, German police dog, alsatian', 236: 'Doberman, Doberman pinscher', 237: 'miniature pinscher', 238: 'Greater Swiss Mountain dog', 239: 'Bernese mountain dog', 240: 'Appenzeller', 241: 'EntleBucher', 242: 'boxer', 243: 'bull mastiff', 244: 'Tibetan mastiff', 245: 'French bulldog', 246: 'Great Dane', 247: 'Saint Bernard, St Bernard', 248: 'Eskimo dog, husky', 249: 'malamute, malemute, Alaskan malamute', 250: 'Siberian husky', 251: 'dalmatian, coach dog, carriage dog', 252: 'affenpinscher, monkey pinscher, monkey dog', 253: 'basenji', 254: 'pug, pug-dog', 255: 'Leonberg', 256: 'Newfoundland, Newfoundland dog', 257: 'Great Pyrenees', 258: 'Samoyed, Samoyede', 259: 'Pomeranian', 260: 'chow, chow chow', 261: 'keeshond', 262: 'Brabancon griffon', 263: 'Pembroke, Pembroke Welsh corgi', 264: 'Cardigan, Cardigan Welsh corgi', 265: 'toy poodle', 266: 'miniature poodle', 267: 'standard poodle', 268: 'Mexican hairless', 269: 'timber wolf, grey wolf, gray wolf, Canis lupus', 270: 'white wolf, Arctic wolf, Canis lupus tundrarum', 271: 'red wolf, maned wolf, Canis rufus, Canis niger', 272: 'coyote, prairie wolf, brush wolf, Canis latrans', 273: 'dingo, warrigal, warragal, Canis dingo', 274: 'dhole, Cuon alpinus', 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus', 276: 'hyena, hyaena', 277: 'red fox, Vulpes vulpes', 278: 'kit fox, Vulpes macrotis', 279: 'Arctic fox, white fox, Alopex lagopus', 280: 'grey fox, gray fox, Urocyon cinereoargenteus', 281: 'tabby, tabby cat', 282: 'tiger cat', 283: 'Persian cat', 284: 'Siamese cat, Siamese', 285: 'Egyptian cat', 286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor', 287: 'lynx, catamount', 288: 'leopard, Panthera pardus', 289: 'snow leopard, ounce, Panthera uncia', 290: 'jaguar, panther, Panthera onca, Felis onca', 291: 'lion, king of beasts, Panthera leo', 292: 'tiger, Panthera tigris', 293: 'cheetah, chetah, Acinonyx jubatus', 294: 'brown bear, bruin, Ursus arctos', 295: 'American black bear, black bear, Ursus americanus, Euarctos americanus', 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus', 297: 'sloth bear, Melursus ursinus, Ursus ursinus', 298: 'mongoose', 299: 'meerkat, mierkat', 300: 'tiger beetle', 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle', 302: 'ground beetle, carabid beetle', 303: 'long-horned beetle, longicorn, longicorn beetle', 304: 'leaf beetle, chrysomelid', 305: 'dung beetle', 306: 'rhinoceros beetle', 307: 'weevil', 308: 'fly', 309: 'bee', 310: 'ant, emmet, pismire', 311: 'grasshopper, hopper', 312: 'cricket', 313: 'walking stick, walkingstick, stick insect', 314: 'cockroach, roach', 315: 'mantis, mantid', 316: 'cicada, cicala', 317: 'leafhopper', 318: 'lacewing, lacewing fly', 319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", 320: 'damselfly', 321: 'admiral', 322: 'ringlet, ringlet butterfly', 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus', 324: 'cabbage butterfly', 325: 'sulphur butterfly, sulfur butterfly', 326: 'lycaenid, lycaenid butterfly', 327: 'starfish, sea star', 328: 'sea urchin', 329: 'sea cucumber, holothurian', 330: 'wood rabbit, cottontail, cottontail rabbit', 331: 'hare', 332: 'Angora, Angora rabbit', 333: 'hamster', 334: 'porcupine, hedgehog', 335: 'fox squirrel, eastern fox squirrel, Sciurus niger', 336: 'marmot', 337: 'beaver', 338: 'guinea pig, Cavia cobaya', 339: 'sorrel', 340: 'zebra', 341: 'hog, pig, grunter, squealer, Sus scrofa', 342: 'wild boar, boar, Sus scrofa', 343: 'warthog', 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius', 345: 'ox', 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis', 347: 'bison', 348: 'ram, tup', 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis', 350: 'ibex, Capra ibex', 351: 'hartebeest', 352: 'impala, Aepyceros melampus', 353: 'gazelle', 354: 'Arabian camel, dromedary, Camelus dromedarius', 355: 'llama', 356: 'weasel', 357: 'mink', 358: 'polecat, fitch, foulmart, foumart, Mustela putorius', 359: 'black-footed ferret, ferret, Mustela nigripes', 360: 'otter', 361: 'skunk, polecat, wood pussy', 362: 'badger', 363: 'armadillo', 364: 'three-toed sloth, ai, Bradypus tridactylus', 365: 'orangutan, orang, orangutang, Pongo pygmaeus', 366: 'gorilla, Gorilla gorilla', 367: 'chimpanzee, chimp, Pan troglodytes', 368: 'gibbon, Hylobates lar', 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus', 370: 'guenon, guenon monkey', 371: 'patas, hussar monkey, Erythrocebus patas', 372: 'baboon', 373: 'macaque', 374: 'langur', 375: 'colobus, colobus monkey', 376: 'proboscis monkey, Nasalis larvatus', 377: 'marmoset', 378: 'capuchin, ringtail, Cebus capucinus', 379: 'howler monkey, howler', 380: 'titi, titi monkey', 381: 'spider monkey, Ateles geoffroyi', 382: 'squirrel monkey, Saimiri sciureus', 383: 'Madagascar cat, ring-tailed lemur, Lemur catta', 384: 'indri, indris, Indri indri, Indri brevicaudatus', 385: 'Indian elephant, Elephas maximus', 386: 'African elephant, Loxodonta africana', 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens', 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca', 389: 'barracouta, snoek', 390: 'eel', 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch', 392: 'rock beauty, Holocanthus tricolor', 393: 'anemone fish', 394: 'sturgeon', 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus', 396: 'lionfish', 397: 'puffer, pufferfish, blowfish, globefish', 398: 'abacus', 399: 'abaya', 400: "academic gown, academic robe, judge's robe", 401: 'accordion, piano accordion, squeeze box', 402: 'acoustic guitar', 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier', 404: 'airliner', 405: 'airship, dirigible', 406: 'altar', 407: 'ambulance', 408: 'amphibian, amphibious vehicle', 409: 'analog clock', 410: 'apiary, bee house', 411: 'apron', 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin', 413: 'assault rifle, assault gun', 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack', 415: 'bakery, bakeshop, bakehouse', 416: 'balance beam, beam', 417: 'balloon', 418: 'ballpoint, ballpoint pen, ballpen, Biro', 419: 'Band Aid', 420: 'banjo', 421: 'bannister, banister, balustrade, balusters, handrail', 422: 'barbell', 423: 'barber chair', 424: 'barbershop', 425: 'barn', 426: 'barometer', 427: 'barrel, cask', 428: 'barrow, garden cart, lawn cart, wheelbarrow', 429: 'baseball', 430: 'basketball', 431: 'bassinet', 432: 'bassoon', 433: 'bathing cap, swimming cap', 434: 'bath towel', 435: 'bathtub, bathing tub, bath, tub', 436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon', 437: 'beacon, lighthouse, beacon light, pharos', 438: 'beaker', 439: 'bearskin, busby, shako', 440: 'beer bottle', 441: 'beer glass', 442: 'bell cote, bell cot', 443: 'bib', 444: 'bicycle-built-for-two, tandem bicycle, tandem', 445: 'bikini, two-piece', 446: 'binder, ring-binder', 447: 'binoculars, field glasses, opera glasses', 448: 'birdhouse', 449: 'boathouse', 450: 'bobsled, bobsleigh, bob', 451: 'bolo tie, bolo, bola tie, bola', 452: 'bonnet, poke bonnet', 453: 'bookcase', 454: 'bookshop, bookstore, bookstall', 455: 'bottlecap', 456: 'bow', 457: 'bow tie, bow-tie, bowtie', 458: 'brass, memorial tablet, plaque', 459: 'brassiere, bra, bandeau', 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty', 461: 'breastplate, aegis, egis', 462: 'broom', 463: 'bucket, pail', 464: 'buckle', 465: 'bulletproof vest', 466: 'bullet train, bullet', 467: 'butcher shop, meat market', 468: 'cab, hack, taxi, taxicab', 469: 'caldron, cauldron', 470: 'candle, taper, wax light', 471: 'cannon', 472: 'canoe', 473: 'can opener, tin opener', 474: 'cardigan', 475: 'car mirror', 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig', 477: "carpenter's kit, tool kit", 478: 'carton', 479: 'car wheel', 480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM', 481: 'cassette', 482: 'cassette player', 483: 'castle', 484: 'catamaran', 485: 'CD player', 486: 'cello, violoncello', 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone', 488: 'chain', 489: 'chainlink fence', 490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour', 491: 'chain saw, chainsaw', 492: 'chest', 493: 'chiffonier, commode', 494: 'chime, bell, gong', 495: 'china cabinet, china closet', 496: 'Christmas stocking', 497: 'church, church building', 498: 'cinema, movie theater, movie theatre, movie house, picture palace', 499: 'cleaver, meat cleaver, chopper', 500: 'cliff dwelling', 501: 'cloak', 502: 'clog, geta, patten, sabot', 503: 'cocktail shaker', 504: 'coffee mug', 505: 'coffeepot', 506: 'coil, spiral, volute, whorl, helix', 507: 'combination lock', 508: 'computer keyboard, keypad', 509: 'confectionery, confectionary, candy store', 510: 'container ship, containership, container vessel', 511: 'convertible', 512: 'corkscrew, bottle screw', 513: 'cornet, horn, trumpet, trump', 514: 'cowboy boot', 515: 'cowboy hat, ten-gallon hat', 516: 'cradle', 517: 'crane', 518: 'crash helmet', 519: 'crate', 520: 'crib, cot', 521: 'Crock Pot', 522: 'croquet ball', 523: 'crutch', 524: 'cuirass', 525: 'dam, dike, dyke', 526: 'desk', 527: 'desktop computer', 528: 'dial telephone, dial phone', 529: 'diaper, nappy, napkin', 530: 'digital clock', 531: 'digital watch', 532: 'dining table, board', 533: 'dishrag, dishcloth', 534: 'dishwasher, dish washer, dishwashing machine', 535: 'disk brake, disc brake', 536: 'dock, dockage, docking facility', 537: 'dogsled, dog sled, dog sleigh', 538: 'dome', 539: 'doormat, welcome mat', 540: 'drilling platform, offshore rig', 541: 'drum, membranophone, tympan', 542: 'drumstick', 543: 'dumbbell', 544: 'Dutch oven', 545: 'electric fan, blower', 546: 'electric guitar', 547: 'electric locomotive', 548: 'entertainment center', 549: 'envelope', 550: 'espresso maker', 551: 'face powder', 552: 'feather boa, boa', 553: 'file, file cabinet, filing cabinet', 554: 'fireboat', 555: 'fire engine, fire truck', 556: 'fire screen, fireguard', 557: 'flagpole, flagstaff', 558: 'flute, transverse flute', 559: 'folding chair', 560: 'football helmet', 561: 'forklift', 562: 'fountain', 563: 'fountain pen', 564: 'four-poster', 565: 'freight car', 566: 'French horn, horn', 567: 'frying pan, frypan, skillet', 568: 'fur coat', 569: 'garbage truck, dustcart', 570: 'gasmask, respirator, gas helmet', 571: 'gas pump, gasoline pump, petrol pump, island dispenser', 572: 'goblet', 573: 'go-kart', 574: 'golf ball', 575: 'golfcart, golf cart', 576: 'gondola', 577: 'gong, tam-tam', 578: 'gown', 579: 'grand piano, grand', 580: 'greenhouse, nursery, glasshouse', 581: 'grille, radiator grille', 582: 'grocery store, grocery, food market, market', 583: 'guillotine', 584: 'hair slide', 585: 'hair spray', 586: 'half track', 587: 'hammer', 588: 'hamper', 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier', 590: 'hand-held computer, hand-held microcomputer', 591: 'handkerchief, hankie, hanky, hankey', 592: 'hard disc, hard disk, fixed disk', 593: 'harmonica, mouth organ, harp, mouth harp', 594: 'harp', 595: 'harvester, reaper', 596: 'hatchet', 597: 'holster', 598: 'home theater, home theatre', 599: 'honeycomb', 600: 'hook, claw', 601: 'hoopskirt, crinoline', 602: 'horizontal bar, high bar', 603: 'horse cart, horse-cart', 604: 'hourglass', 605: 'iPod', 606: 'iron, smoothing iron', 607: "jack-o'-lantern", 608: 'jean, blue jean, denim', 609: 'jeep, landrover', 610: 'jersey, T-shirt, tee shirt', 611: 'jigsaw puzzle', 612: 'jinrikisha, ricksha, rickshaw', 613: 'joystick', 614: 'kimono', 615: 'knee pad', 616: 'knot', 617: 'lab coat, laboratory coat', 618: 'ladle', 619: 'lampshade, lamp shade', 620: 'laptop, laptop computer', 621: 'lawn mower, mower', 622: 'lens cap, lens cover', 623: 'letter opener, paper knife, paperknife', 624: 'library', 625: 'lifeboat', 626: 'lighter, light, igniter, ignitor', 627: 'limousine, limo', 628: 'liner, ocean liner', 629: 'lipstick, lip rouge', 630: 'Loafer', 631: 'lotion', 632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system', 633: "loupe, jeweler's loupe", 634: 'lumbermill, sawmill', 635: 'magnetic compass', 636: 'mailbag, postbag', 637: 'mailbox, letter box', 638: 'maillot', 639: 'maillot, tank suit', 640: 'manhole cover', 641: 'maraca', 642: 'marimba, xylophone', 643: 'mask', 644: 'matchstick', 645: 'maypole', 646: 'maze, labyrinth', 647: 'measuring cup', 648: 'medicine chest, medicine cabinet', 649: 'megalith, megalithic structure', 650: 'microphone, mike', 651: 'microwave, microwave oven', 652: 'military uniform', 653: 'milk can', 654: 'minibus', 655: 'miniskirt, mini', 656: 'minivan', 657: 'missile', 658: 'mitten', 659: 'mixing bowl', 660: 'mobile home, manufactured home', 661: 'Model T', 662: 'modem', 663: 'monastery', 664: 'monitor', 665: 'moped', 666: 'mortar', 667: 'mortarboard', 668: 'mosque', 669: 'mosquito net', 670: 'motor scooter, scooter', 671: 'mountain bike, all-terrain bike, off-roader', 672: 'mountain tent', 673: 'mouse, computer mouse', 674: 'mousetrap', 675: 'moving van', 676: 'muzzle', 677: 'nail', 678: 'neck brace', 679: 'necklace', 680: 'nipple', 681: 'notebook, notebook computer', 682: 'obelisk', 683: 'oboe, hautboy, hautbois', 684: 'ocarina, sweet potato', 685: 'odometer, hodometer, mileometer, milometer', 686: 'oil filter', 687: 'organ, pipe organ', 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO', 689: 'overskirt', 690: 'oxcart', 691: 'oxygen mask', 692: 'packet', 693: 'paddle, boat paddle', 694: 'paddlewheel, paddle wheel', 695: 'padlock', 696: 'paintbrush', 697: "pajama, pyjama, pj's, jammies", 698: 'palace', 699: 'panpipe, pandean pipe, syrinx', 700: 'paper towel', 701: 'parachute, chute', 702: 'parallel bars, bars', 703: 'park bench', 704: 'parking meter', 705: 'passenger car, coach, carriage', 706: 'patio, terrace', 707: 'pay-phone, pay-station', 708: 'pedestal, plinth, footstall', 709: 'pencil box, pencil case', 710: 'pencil sharpener', 711: 'perfume, essence', 712: 'Petri dish', 713: 'photocopier', 714: 'pick, plectrum, plectron', 715: 'pickelhaube', 716: 'picket fence, paling', 717: 'pickup, pickup truck', 718: 'pier', 719: 'piggy bank, penny bank', 720: 'pill bottle', 721: 'pillow', 722: 'ping-pong ball', 723: 'pinwheel', 724: 'pirate, pirate ship', 725: 'pitcher, ewer', 726: "plane, carpenter's plane, woodworking plane", 727: 'planetarium', 728: 'plastic bag', 729: 'plate rack', 730: 'plow, plough', 731: "plunger, plumber's helper", 732: 'Polaroid camera, Polaroid Land camera', 733: 'pole', 734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria', 735: 'poncho', 736: 'pool table, billiard table, snooker table', 737: 'pop bottle, soda bottle', 738: 'pot, flowerpot', 739: "potter's wheel", 740: 'power drill', 741: 'prayer rug, prayer mat', 742: 'printer', 743: 'prison, prison house', 744: 'projectile, missile', 745: 'projector', 746: 'puck, hockey puck', 747: 'punching bag, punch bag, punching ball, punchball', 748: 'purse', 749: 'quill, quill pen', 750: 'quilt, comforter, comfort, puff', 751: 'racer, race car, racing car', 752: 'racket, racquet', 753: 'radiator', 754: 'radio, wireless', 755: 'radio telescope, radio reflector', 756: 'rain barrel', 757: 'recreational vehicle, RV, R.V.', 758: 'reel', 759: 'reflex camera', 760: 'refrigerator, icebox', 761: 'remote control, remote', 762: 'restaurant, eating house, eating place, eatery', 763: 'revolver, six-gun, six-shooter', 764: 'rifle', 765: 'rocking chair, rocker', 766: 'rotisserie', 767: 'rubber eraser, rubber, pencil eraser', 768: 'rugby ball', 769: 'rule, ruler', 770: 'running shoe', 771: 'safe', 772: 'safety pin', 773: 'saltshaker, salt shaker', 774: 'sandal', 775: 'sarong', 776: 'sax, saxophone', 777: 'scabbard', 778: 'scale, weighing machine', 779: 'school bus', 780: 'schooner', 781: 'scoreboard', 782: 'screen, CRT screen', 783: 'screw', 784: 'screwdriver', 785: 'seat belt, seatbelt', 786: 'sewing machine', 787: 'shield, buckler', 788: 'shoe shop, shoe-shop, shoe store', 789: 'shoji', 790: 'shopping basket', 791: 'shopping cart', 792: 'shovel', 793: 'shower cap', 794: 'shower curtain', 795: 'ski', 796: 'ski mask', 797: 'sleeping bag', 798: 'slide rule, slipstick', 799: 'sliding door', 800: 'slot, one-armed bandit', 801: 'snorkel', 802: 'snowmobile', 803: 'snowplow, snowplough', 804: 'soap dispenser', 805: 'soccer ball', 806: 'sock', 807: 'solar dish, solar collector, solar furnace', 808: 'sombrero', 809: 'soup bowl', 810: 'space bar', 811: 'space heater', 812: 'space shuttle', 813: 'spatula', 814: 'speedboat', 815: "spider web, spider's web", 816: 'spindle', 817: 'sports car, sport car', 818: 'spotlight, spot', 819: 'stage', 820: 'steam locomotive', 821: 'steel arch bridge', 822: 'steel drum', 823: 'stethoscope', 824: 'stole', 825: 'stone wall', 826: 'stopwatch, stop watch', 827: 'stove', 828: 'strainer', 829: 'streetcar, tram, tramcar, trolley, trolley car', 830: 'stretcher', 831: 'studio couch, day bed', 832: 'stupa, tope', 833: 'submarine, pigboat, sub, U-boat', 834: 'suit, suit of clothes', 835: 'sundial', 836: 'sunglass', 837: 'sunglasses, dark glasses, shades', 838: 'sunscreen, sunblock, sun blocker', 839: 'suspension bridge', 840: 'swab, swob, mop', 841: 'sweatshirt', 842: 'swimming trunks, bathing trunks', 843: 'swing', 844: 'switch, electric switch, electrical switch', 845: 'syringe', 846: 'table lamp', 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle', 848: 'tape player', 849: 'teapot', 850: 'teddy, teddy bear', 851: 'television, television system', 852: 'tennis ball', 853: 'thatch, thatched roof', 854: 'theater curtain, theatre curtain', 855: 'thimble', 856: 'thresher, thrasher, threshing machine', 857: 'throne', 858: 'tile roof', 859: 'toaster', 860: 'tobacco shop, tobacconist shop, tobacconist', 861: 'toilet seat', 862: 'torch', 863: 'totem pole', 864: 'tow truck, tow car, wrecker', 865: 'toyshop', 866: 'tractor', 867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi', 868: 'tray', 869: 'trench coat', 870: 'tricycle, trike, velocipede', 871: 'trimaran', 872: 'tripod', 873: 'triumphal arch', 874: 'trolleybus, trolley coach, trackless trolley', 875: 'trombone', 876: 'tub, vat', 877: 'turnstile', 878: 'typewriter keyboard', 879: 'umbrella', 880: 'unicycle, monocycle', 881: 'upright, upright piano', 882: 'vacuum, vacuum cleaner', 883: 'vase', 884: 'vault', 885: 'velvet', 886: 'vending machine', 887: 'vestment', 888: 'viaduct', 889: 'violin, fiddle', 890: 'volleyball', 891: 'waffle iron', 892: 'wall clock', 893: 'wallet, billfold, notecase, pocketbook', 894: 'wardrobe, closet, press', 895: 'warplane, military plane', 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin', 897: 'washer, automatic washer, washing machine', 898: 'water bottle', 899: 'water jug', 900: 'water tower', 901: 'whiskey jug', 902: 'whistle', 903: 'wig', 904: 'window screen', 905: 'window shade', 906: 'Windsor tie', 907: 'wine bottle', 908: 'wing', 909: 'wok', 910: 'wooden spoon', 911: 'wool, woolen, woollen', 912: 'worm fence, snake fence, snake-rail fence, Virginia fence', 913: 'wreck', 914: 'yawl', 915: 'yurt', 916: 'web site, website, internet site, site', 917: 'comic book', 918: 'crossword puzzle, crossword', 919: 'street sign', 920: 'traffic light, traffic signal, stoplight', 921: 'book jacket, dust cover, dust jacket, dust wrapper', 922: 'menu', 923: 'plate', 924: 'guacamole', 925: 'consomme', 926: 'hot pot, hotpot', 927: 'trifle', 928: 'ice cream, icecream', 929: 'ice lolly, lolly, lollipop, popsicle', 930: 'French loaf', 931: 'bagel, beigel', 932: 'pretzel', 933: 'cheeseburger', 934: 'hotdog, hot dog, red hot', 935: 'mashed potato', 936: 'head cabbage', 937: 'broccoli', 938: 'cauliflower', 939: 'zucchini, courgette', 940: 'spaghetti squash', 941: 'acorn squash', 942: 'butternut squash', 943: 'cucumber, cuke', 944: 'artichoke, globe artichoke', 945: 'bell pepper', 946: 'cardoon', 947: 'mushroom', 948: 'Granny Smith', 949: 'strawberry', 950: 'orange', 951: 'lemon', 952: 'fig', 953: 'pineapple, ananas', 954: 'banana', 955: 'jackfruit, jak, jack', 956: 'custard apple', 957: 'pomegranate', 958: 'hay', 959: 'carbonara', 960: 'chocolate sauce, chocolate syrup', 961: 'dough', 962: 'meat loaf, meatloaf', 963: 'pizza, pizza pie', 964: 'potpie', 965: 'burrito', 966: 'red wine', 967: 'espresso', 968: 'cup', 969: 'eggnog', 970: 'alp', 971: 'bubble', 972: 'cliff, drop, drop-off', 973: 'coral reef', 974: 'geyser', 975: 'lakeside, lakeshore', 976: 'promontory, headland, head, foreland', 977: 'sandbar, sand bar', 978: 'seashore, coast, seacoast, sea-coast', 979: 'valley, vale', 980: 'volcano', 981: 'ballplayer, baseball player', 982: 'groom, bridegroom', 983: 'scuba diver', 984: 'rapeseed', 985: 'daisy', 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", 987: 'corn', 988: 'acorn', 989: 'hip, rose hip, rosehip', 990: 'buckeye, horse chestnut, conker', 991: 'coral fungus', 992: 'agaric', 993: 'gyromitra', 994: 'stinkhorn, carrion fungus', 995: 'earthstar', 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa', 997: 'bolete', 998: 'ear, spike, capitulum', 999: 'toilet tissue, toilet paper, bathroom tissue'} ================================================ FILE: examples/ONNX/resnet50/int8.py ================================================ import calibrator import tensorrt as trt # Use TensorRT ONNX parser to parse model file, and enable INT8 calibration during engine construction def build_int8_engine_onnx(model_file, image_dir, batch_size, calibration_batches, engine_file, cache_file='INT8CalibrationTable'): TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: parser.parse(model.read()) # Allow builder to use INT8 or FP16 kernels when building engine builder.int8_mode = True builder.fp16_mode = True calib = calibrator.ONNXEntropyCalibrator(image_dir, batch_size, calibration_batches, cache_file) builder.int8_calibrator = calib builder.max_batch_size = batch_size engine = builder.build_cuda_engine(network) with open(engine_file, 'wb') as f: f.write(engine.serialize()) ================================================ FILE: examples/ONNX/resnet50/onnx_utils.py ================================================ #!/usr/bin/env python3 import glob import os import onnx from onnx import numpy_helper from matplotlib import pyplot as plt import numpy as np def load_inputs(test_data_dir): # Load inputs inputs = [] inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb'))) for i in range(inputs_num): input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i)) tensor = onnx.TensorProto() with open(input_file, 'rb') as f: tensor.ParseFromString(f.read()) inputs.append(numpy_helper.to_array(tensor)) return inputs def load_outputs(test_data_dir): # Load reference outputs ref_outputs = [] ref_outputs_num = len(glob.glob(os.path.join(test_data_dir, 'output_*.pb'))) for i in range(ref_outputs_num): output_file = os.path.join(test_data_dir, 'output_{}.pb'.format(i)) tensor = onnx.TensorProto() with open(output_file, 'rb') as f: tensor.ParseFromString(f.read()) ref_outputs.append(numpy_helper.to_array(tensor)) return ref_outputs def mnist_image(data): two_d = (np.reshape(data, (28, 28))).astype(np.uint8) plt.imshow(two_d, interpolation='nearest') return plt def softmax(x): """Compute softmax values for each sets of scores in x.""" e_x = np.exp(x - np.max(x)) return e_x / e_x.sum() ================================================ FILE: examples/ONNX/resnet50/open_source_images.md5 ================================================ 6cd502bc217f3960cf34447ec4ede610 open_source_images.tar.gz ================================================ FILE: examples/ONNX/resnet50/resnet50.md5 ================================================ 0e8088c7b1a1a9b2d0a5ae05601cc55e resnet50.tar.gz ================================================ FILE: examples/ONNX/resnet50/run_jpeg_test.py ================================================ #!/usr/bin/env python3 import os import time import trtlab import onnx_utils as utils import numpy as np import matplotlib.pyplot as plt import mxnet as mx from mxnet.gluon.data.vision import transforms from imagenet_labels import labels import click tests = {} def tensorrt_init(engines): manager = trtlab.InferenceManager(max_exec_concurrency=4) runners = [] for engine in engines: name, _ = os.path.splitext(os.path.basename(engine)) runners.append(manager.register_tensorrt_engine(name, engine)) manager.update_resources() return runners def infer_image(runner, image): inputs = preprocess_image(runner, image) future = runner.infer(**inputs) result = future.get() for name, tensor in result.items(): tensor = tensor.reshape(1000) idx = np.argmax(tensor) print("\n*** Results ***") print(labels[idx], tensor[idx]) print("") def preprocess_image(runner, image_path): inputs = runner.input_bindings() keys = list(inputs.keys()) input_name = keys[0] img = np.array(plt.imread(image_path)) img = transform_image(img) return { input_name: img } def transform_image(img): transform_fn = transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) img = transform_fn(mx.nd.array(img)).asnumpy() img = np.expand_dims(img, axis=0) # batchify return img def validate_results(computed, expected): keys = list(computed.keys()) output_name = keys[0] output_value = computed[output_name] np.testing.assert_almost_equal(output_value, expected[0], decimal=3) print("-- Test Passed: All outputs {} match within 3 decimals".format(output_value.shape)) File = click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True) Path = click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True) @click.command() @click.option("--image", type=File, multiple=True) @click.argument("engine", type=File, nargs=1) def main(engine, image): runners = tensorrt_init([engine]) for runner in runners: for img in image: infer_image(runner, img) if __name__ == "__main__": main() ================================================ FILE: examples/ONNX/resnet50/run_onnx_tests.py ================================================ #!/usr/bin/env python3 import os import trtlab import numpy as np import click import onnx_utils as utils tests = {} def tensorrt_init(engines): manager = trtlab.InferenceManager(max_exec_concurrency=4) runners = [] for engine in engines: name, _ = os.path.splitext(os.path.basename(engine)) runners.append(manager.register_tensorrt_engine(name, engine)) manager.update_resources() return runners def test_data(test_path): for path, dirs, files in os.walk(test_path): if os.path.basename(path).startswith("test_"): tests[path] = files for path, files in tests.items(): inputs = utils.load_inputs(path) outputs = utils.load_outputs(path) print("** Testing {} **".format(path)) yield inputs, outputs def run_test(runner, inputs, outputs): inputs = preprocess_inputs(runner, inputs) future = runner.infer(**inputs) result = future.get() validate_results(result, outputs) def preprocess_inputs(runner, inputs): expected_input = runner.input_bindings() if len(expected_input) != len(inputs): raise RuntimeError("mismatched number of inputs") keys = list(expected_input.keys()) input_name = keys[0] info = expected_input[keys[0]] shape = info['shape'] tensor = inputs[0] batch_size = tensor.shape[0] if list(shape) != list(tensor.shape[1:]): raise RuntimeError("mismatched input dimensions") return { input_name: tensor } def validate_results(computed, expected): keys = list(computed.keys()) output_name = keys[0] output_value = computed[output_name] np.testing.assert_almost_equal(output_value, expected[0], decimal=3) print("-- Test Passed: All outputs {} match within 3 decimals".format(output_value.shape)) File = click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True) Path = click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True) @click.command() @click.option("--tests", type=Path, default="resnet50") @click.argument("engine", type=File, nargs=1) def main(engine, tests): runners = tensorrt_init([engine]) for runner in runners: for inputs, outputs in test_data(tests): run_test(runner, inputs, outputs) if __name__ == "__main__": main() ================================================ FILE: examples/nvRPC/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add_subdirectory(UnaryService) add_subdirectory(StreamingService) add_subdirectory(SharedMemoryService) # TODO: WIP # add_subdirectory(StreamingInOrderSendRecv) ================================================ FILE: examples/nvRPC/SharedMemoryService/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add_executable(sysv-nvrpc.x server.cc) target_link_libraries(sysv-nvrpc.x nvrpc echo-protos gflags ) add_executable(sysv-client.x client.cc) target_link_libraries(sysv-client.x nvrpc echo-protos gflags ) ================================================ FILE: examples/nvRPC/SharedMemoryService/README.md ================================================ # Shared Memory Service Client/Server service extending the Basic nvRPC example. The client (`sysv-client.x`) creates a `CyclicAllocator` from which it allocates buffers of shared memory. The client: 1. Write some data into shared memory (batch_id and 0xDEADBEEF), `data[0]` and `data[1]` respectively. 2. Packs the RPC message with the batch_id and the SystemV memory descriptor details 3. Initiates the RPC 4. Checks the value of the address in which it wrote 0xDEADBEEF to ensure that the server wrote it's response to the proper location. On reciept of a client RPC, the server Aquires a `Descriptor` to the offset in the shared memory segment required in the RPC. This is done via the `ExternalSharedMemoryManager`. The server: 1. Aquires the `Descriptor` to the offset 2. Checks the values at `data[0]` and `data[1]` for `batch_id` and `0xDEADBEEF` respectively 3. Write the `batch_id` into `[1]` element 4. Returns the Response ================================================ FILE: examples/nvRPC/SharedMemoryService/client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include "tensorrt/laboratory/core/memory/cyclic_allocator.h" #include "tensorrt/laboratory/core/memory/system_v.h" #include "echo.grpc.pb.h" using grpc::Channel; using grpc::ClientContext; using grpc::Status; using simple::Inference; using simple::Input; using simple::Output; using trtlab::CyclicAllocator; using trtlab::SystemV; static constexpr size_t one_mb = 1024 * 1024; DEFINE_int32(count, 1, "number of grpc messages to send"); class SimpleClient final { public: SimpleClient(std::shared_ptr channel) : m_Stub(Inference::NewStub(channel)), m_Memory(5, one_mb) { } // Generate and send RPC message int Compute(const int batch_id) { // Allocate some SysV shared memory from the CyclicAllocator CyclicAllocator::Descriptor mdesc = RandomAllocation(); // Populate the request object Input request; request.set_batch_id(batch_id); auto sysv = request.mutable_sysv(); sysv->set_shm_id(mdesc->Stack().Memory().ShmID()); sysv->set_offset(mdesc->Offset()); sysv->set_size(mdesc->Size()); // Write the batch_id to the shared memory segment // This will validated against the batch_id in the message body on the server auto data = mdesc->CastToArray(); data[0] = batch_id; data[1] = 0xDEADBEEF; // Container for the data we expect from the server. Output reply; // Context for the client. It could be used to convey extra information to // the server and/or tweak certain RPC behaviors. ClientContext context; // The actual RPC. Status status = m_Stub->Compute(&context, request, &reply); if(status.ok()) { CHECK_EQ(data[1], batch_id); return reply.batch_id(); } else { LOG(ERROR) << status.error_code() << ": " << status.error_message(); return -1; } } private: CyclicAllocator::Descriptor RandomAllocation() { size_t bytes = rand() % (m_Memory.MaxAllocationSize() / 4); bytes = std::max(bytes, 16UL); // guarantee at least 16 bytes (2x size_t) DLOG(INFO) << "RandomAllocation: " << bytes << " bytes"; return m_Memory.Allocate(bytes); } std::unique_ptr m_Stub; CyclicAllocator m_Memory; }; int main(int argc, char** argv) { FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); SimpleClient client(grpc::CreateChannel("localhost:50051", grpc::InsecureChannelCredentials())); auto start = std::chrono::steady_clock::now(); for(int i = 0; i < FLAGS_count; i++) { auto reply = client.Compute(i); LOG_IF(INFO, reply == -1) << "BatchId received: " << reply; } auto end = std::chrono::steady_clock::now(); float elapsed = std::chrono::duration(end - start).count(); std::cout << FLAGS_count << " requests in " << elapsed << "seconds" << std::endl; return 0; } ================================================ FILE: examples/nvRPC/SharedMemoryService/server.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "nvrpc/executor.h" #include "nvrpc/server.h" #include "nvrpc/service.h" #include "tensorrt/laboratory/core/memory/descriptor.h" #include "tensorrt/laboratory/core/memory/system_v.h" #include "tensorrt/laboratory/core/pool.h" #include "tensorrt/laboratory/core/resources.h" #include "tensorrt/laboratory/core/thread_pool.h" #include "tensorrt/laboratory/core/utils.h" #include "echo.grpc.pb.h" #include "echo.pb.h" using nvrpc::AsyncRPC; using nvrpc::AsyncService; using nvrpc::Context; using nvrpc::Executor; using nvrpc::Server; using trtlab::Resources; using trtlab::ThreadPool; using trtlab::Descriptor; using trtlab::SystemV; // CLI Options DEFINE_int32(thread_count, 1, "Size of thread pool"); /** * @brief SystemV Memory Manager * * This object does not allocate system v shared memory segments. Instead, it attaches and manages * descriptors into shared memory segments allocated by an external source. */ class ExternalSharedMemoryManager final { class PartialSegmentDescriptor final : public Descriptor { public: PartialSegmentDescriptor(const std::shared_ptr& segment, size_t offset, size_t size) : Descriptor((*segment)[offset], size, "PartialSysVSegment"), m_Segment(segment) { } PartialSegmentDescriptor(PartialSegmentDescriptor&& other) : Descriptor(std::move(other)), m_Segment{ std::exchange(other.m_Segment, nullptr)} { } PartialSegmentDescriptor& operator=(PartialSegmentDescriptor&&) = delete; DELETE_COPYABILITY(PartialSegmentDescriptor); virtual ~PartialSegmentDescriptor() override {} private: std::shared_ptr m_Segment; }; public: ExternalSharedMemoryManager() = default; using Descriptor = std::unique_ptr; Descriptor Acquire(size_t shm_id, size_t offset, size_t size) { const auto& segment = GetOrAttachToShmID(shm_id); CHECK_LE(offset + size, segment->Size()); return std::make_unique(segment, offset, size); } void Release(size_t shm_id) { std::lock_guard l(m_Mutex); auto count = m_AttachedSegments.erase(shm_id); DLOG_IF(WARNING, count == 0) << "Attempting to Release an unmapped shm_id"; } protected: std::shared_ptr GetOrAttachToShmID(size_t shm_id) { std::shared_ptr segment; std::lock_guard l(m_Mutex); auto search = m_AttachedSegments.find(shm_id); if(search == m_AttachedSegments.end()) { DLOG(INFO) << "SystemV Manager: attaching to shm_id: " << shm_id; segment = SystemV::Attach(shm_id); m_AttachedSegments[shm_id] = segment; } else { segment = search->second; } return segment; } private: std::map> m_AttachedSegments; std::mutex m_Mutex; }; struct SimpleResources : public Resources { SimpleResources() = default; ExternalSharedMemoryManager& GetExternalSharedMemoryManager() { return m_ExternalSharedMemoryManager; } private: ExternalSharedMemoryManager m_ExternalSharedMemoryManager; }; class SimpleContext final : public Context { void ExecuteRPC(RequestType& input, ResponseType& output) final override { ExternalSharedMemoryManager::Descriptor mdesc; if(input.has_sysv()) { mdesc = GetResources()->GetExternalSharedMemoryManager().Acquire( input.sysv().shm_id(), input.sysv().offset(), input.sysv().size()); } CHECK(mdesc); auto array = mdesc->CastToArray(); CHECK_EQ(array[0], input.batch_id()); CHECK_EQ(array[1], 0xDEADBEEF); array[1] = input.batch_id(); output.set_batch_id(input.batch_id()); this->FinishResponse(); } }; int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("simpleServer"); ::google::ParseCommandLineFlags(&argc, &argv, true); // A server will bind an IP:PORT to listen on Server server("0.0.0.0:50051"); // A server can host multiple services LOG(INFO) << "Register Service (simple::Inference) with Server"; auto simpleInference = server.RegisterAsyncService(); LOG(INFO) << "Register RPC (simple::Inference::Compute) with Service (simple::Inference)"; auto rpcCompute = simpleInference->RegisterRPC( &simple::Inference::AsyncService::RequestCompute); LOG(INFO) << "Initializing Resources for RPC (simple::Inference::Compute)"; auto rpcResources = std::make_shared(); LOG(INFO) << "Creating Executor"; auto executor = server.RegisterExecutor(new Executor(1)); LOG(INFO) << "Creating Execution Contexts for RPC (simple::Inference::Compute) with Executor"; executor->RegisterContexts(rpcCompute, rpcResources, 10); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(2000), [] {}); } ================================================ FILE: examples/nvRPC/StreamingInOrderSendRecv/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add_executable(nvrpc-bidirectional-server.x server.cc) target_link_libraries(nvrpc-bidirectional-server.x nvrpc echo-protos gflags ) add_executable(nvrpc-bidirectional-client.x client.cc ) target_link_libraries(nvrpc-bidirectional-client.x nvrpc nvrpc-client echo-protos gflags ) ================================================ FILE: examples/nvRPC/StreamingInOrderSendRecv/README.md ================================================ # BidirectionalStream In-Order Send/Recv ``` rpc InOrderSendRecv (stream Request) returns (stream Response) ``` The service will accept a stream or Requests, queue them for in-order execution via the `ExecuteRPC` virtual method, and for each result of `ExecuteRPC` return a Response on the stream. Only one `ExecuteRPC` can be in-action at anytime which allows the RPC to optionally maintain a state. In some regards, this lifecyle could be used to model an RNN as the collective history can be built into the resources. ================================================ FILE: examples/nvRPC/StreamingInOrderSendRecv/client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "nvrpc/client/client_streaming.h" #include "nvrpc/client/executor.h" using grpc::Channel; using grpc::ClientContext; using grpc::Status; using nvrpc::client::ClientBidirectional; using nvrpc::client::Executor; #include "echo.grpc.pb.h" using simple::Inference; using simple::Input; using simple::Output; DEFINE_int32(count, 100, "number of grpc messages to send"); DEFINE_int32(thread_count, 1, "Size of thread pool"); DEFINE_string(hostname, "127.0.0.1:50051", "hostname and port"); int main(int argc, char** argv) { // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); std::mutex mutex; std::size_t count = 0; auto executor = std::make_shared(FLAGS_thread_count); auto channel = grpc::CreateChannel(FLAGS_hostname, grpc::InsecureChannelCredentials()); auto stub = Inference::NewStub(channel); auto infer_prepare_fn = [&stub](::grpc::ClientContext * context, ::grpc::CompletionQueue * cq) -> auto { return std::move(stub->PrepareAsyncBidirectional(context, cq)); }; auto stream = std::make_unique>( infer_prepare_fn, executor, [](Input&& request) { LOG_FIRST_N(INFO, 10) << "Sent Request with BatchID: " << request.batch_id(); static size_t last = 0; CHECK_EQ(last + 1, request.batch_id()); ++last; // CHECK(request.batch_id()); }, [&mutex, &count](Output&& response) { LOG_FIRST_N(INFO, 10) << "Received Response with BatchID: " << response.batch_id(); // CHECK(response.batch_id()); std::lock_guard lock(mutex); --count; }); auto start = std::chrono::steady_clock::now(); auto elapsed = [start]() -> float { return std::chrono::duration(std::chrono::steady_clock::now() - start).count(); }; for(int i = 1; i < FLAGS_count + 1; i++) { { std::lock_guard lock(mutex); ++count; } Input input; input.set_batch_id(i); stream->Send(std::move(input)); } std::cout << FLAGS_count << " queued in " << elapsed() << "seconds" << std::endl; auto future = stream->Done(); auto status = future.get(); executor->ShutdownAndJoin(); CHECK_EQ(count, 0UL); std::cout << FLAGS_count << " completed in " << elapsed() << "seconds" << std::endl; std::cout << "gRPC Status: " << (status.ok() ? "OK" : "NOT OK") << std::endl; return 0; } ================================================ FILE: examples/nvRPC/StreamingInOrderSendRecv/server.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "tensorrt/laboratory/core/pool.h" #include "tensorrt/laboratory/core/resources.h" #include "tensorrt/laboratory/core/thread_pool.h" using trtlab::Resources; using trtlab::ThreadPool; #include "nvrpc/executor.h" #include "nvrpc/server.h" #include "nvrpc/service.h" using nvrpc::AsyncRPC; using nvrpc::AsyncService; using nvrpc::BidirectionalContext; using nvrpc::Executor; using nvrpc::Server; #include "echo.grpc.pb.h" #include "echo.pb.h" // CLI Options DEFINE_int32(thread_count, 1, "Size of thread pool"); // Define the resources your RPC will need to execute // ================================================== // In this case, all simple::Inference::Compute RPCs share a threadpool in which they will // queue up some work on. This essentially means, after the message as been received and // processed, the actual work for the RPC is pushed to a worker pool outside the scope of // the transaction processing system (TPS). This is essentially async computing, we have // decoupled the transaction from the workers executing the implementation. The TPS can // continue to queue work, while the workers process the load. struct SimpleResources : public Resources { SimpleResources(int numThreadsInPool = 3) : m_ThreadPool(numThreadsInPool) { LOG(INFO) << "Server ThreadCount: " << numThreadsInPool; } ThreadPool& AcquireThreadPool() { return m_ThreadPool; } private: ThreadPool m_ThreadPool; }; // Contexts hold the state and provide the definition of the work to be performed by the RPC. // This is where you define what gets executed for a given RPC. // Incoming Message = simple::Input (RequestType) // Outgoing Message = simple::Output (ResponseType) class SimpleContext final : public BidirectionalContext { void ExecuteRPC(RequestType& input, ResponseType& output) final override { // We could do work here, but we'd block the TPS, i.e. the threads pulling messages // off the incoming recieve queue. Very quick responses are best done here; however, // longer running workload should be offloaded so the TPS can avoid being blocked. // GetResources()->AcquireThreadPool().enqueue([this, &input, &output]{ // Now running on a worker thread of the ThreadPool defined in SimpleResources. // Here we are just echoing back the incoming // batch_id; however, in later // examples, we'll show how to run an async cuda pipline. LOG_FIRST_N(INFO, 10) << "BatchID: " << input.batch_id() << " Tag = " << Tag() << " Thread = " << std::this_thread::get_id(); output.set_batch_id(input.batch_id()); this->FinishResponse(); // }); // The TPS thread is now free to continue processing message - async ftw! } }; DEFINE_string(ip_port, "0.0.0.0:50051", "IP/Port"); int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("simpleServer"); ::google::ParseCommandLineFlags(&argc, &argv, true); // A server will bind an IP:PORT to listen on Server server(FLAGS_ip_port); // A server can host multiple services LOG(INFO) << "Register Service (simple::Inference) with Server"; auto simpleInference = server.RegisterAsyncService(); // An RPC has two components that need to be specified when registering with the service: // 1) Type of Execution Context (SimpleContext). The execution context defines the behavor // of the RPC, i.e. it contains the control logic for the execution of the RPC. // 2) The Request function (RequestCompute) which was generated by gRPC when compiling the // protobuf which defined the service. This function is responsible for queuing the // RPC's execution context to the LOG(INFO) << "Register RPC (simple::Inference::Compute) with Service (simple::Inference)"; auto rpcCompute = simpleInference->RegisterRPC( &simple::Inference::AsyncService::RequestBidirectional); LOG(INFO) << "Initializing Resources for RPC (simple::Inference::Compute)"; auto rpcResources = std::make_shared(FLAGS_thread_count); // Create Executors - Executors provide the messaging processing resources for the RPCs // Multiple Executors can be registered with a Server. The executor is responsible // for pulling incoming message off the receive queue and executing the associated // context. By default, an executor only uses a single thread. A typical usecase is // an Executor executes a context, which immediate pushes the work to a thread pool. // However, for very low-latency messaging, you might want to use a multi-threaded // Executor and a Blocking Context - meaning the Context performs the entire RPC function // on the Executor's thread. LOG(INFO) << "Creating Executor"; auto executor = server.RegisterExecutor(new Executor(1)); // You can register RPC execution contexts from any registered RPC on any executor. // The power of that will become clear in later examples. For now, we will register // 10 instances of the simple::Inference::Compute RPC's SimpleContext execution context // with the Executor. LOG(INFO) << "Creating Execution Contexts for RPC (simple::Inference::Compute) with Executor"; executor->RegisterContexts(rpcCompute, rpcResources, 10); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(2000), [] { // This is a timeout loop executed every 2seconds // Run() with no arguments will run an empty timeout loop every 5 seconds. // RunAsync() will return immediately, its your responsibility to ensure the // server doesn't go out of scope or a Shutdown will be triggered on your services. }); } ================================================ FILE: examples/nvRPC/StreamingInOrderSendRecv/test.sh ================================================ #!/bin/bash cleanup() { kill $(jobs -p) ||: } trap "cleanup" EXIT SIGINT SIGTERM ./nvrpc-bidirectional-server.x --ip_port="0.0.0.0:5555" & f=$(mktmp) cat < $f PS1='nvRPC Bidirectional: ' go() { ./nvrpc-bidirectional-client.x --hostname="localhost:5555" --count=${1:-100} } EOF ps aux echo echo 'Try ./nvrpc-bidirectional-client.x --hostname="localhost:5555" --count=100' bash --rcfile <(echo "PS1='nvRPC Bidirectional: '") ================================================ FILE: examples/nvRPC/StreamingService/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set(LIBS nvrpc echo-protos gflags) add_executable(nvrpc-ping-pong-server.x ping-pong.cc) add_executable(nvrpc-even-odds-server.x even-odds.cc) target_link_libraries(nvrpc-ping-pong-server.x ${LIBS}) target_link_libraries(nvrpc-even-odds-server.x ${LIBS}) add_executable(nvrpc-streaming-client.x client.cc) target_link_libraries(nvrpc-streaming-client.x ${LIBS} nvrpc-client) ================================================ FILE: examples/nvRPC/StreamingService/README.md ================================================ # Streaming Examples Async gRPC streaming can take on many forms. nvRPC provides a set of LifeCycles to accommodate a variety of common use-cases. For all examples, the RPC that we will implement will have the same form: ```protobuf rpc Bidirectional (stream Input) returns (stream Output) {} ``` To implement a `StreamingContext`, you must implement the `ReceivedRequest` pure virtual method. This method is triggered once for each incoming request that is read from the stream. The `ServerStream` object is used to write responses or close the stream. ```c++ class SimpleContext final : public StreamingContext { void RequestReceived(RequestType&& input, std::shared_ptr stream) final override { // `input` incoming message of Input/RequestType // `stream` allows you to: // ->StreamID() // - unique identifier for this stream; note, the ID will be reused when the context is // recycled. // ->WriteResponse(Output&&) // - writes a response on the stream; returns `true` if the stream is connected; // otherwise, `false` if the stream is disconnected // ->IsConnected() // - bool - is the stream still connected to the client. `FinishStream` or `CancelStream` // will disconnect all `ServerStream` objects that share the same StreamID. // ->FinishStream() // - Close the Stream from the server-side with status OK // ->CancelStream() // - Close the Stream with status CANCELLED // // NOTE: The gRPC stream will stay open to the client until: // 1) the client closes its half of the stream, and // 2) all `ServerStream` objects are destroyed OR // the stream is explicitly closed by Cancel/FinishStream } ``` The final comment is worth further discussion. The life of the `ServerStream` object does not have to be tied to the life of the `ReceivedRequest` call. You can pass the `ServerStream` object off to an external resource which can then write messages to on the stream has long as the stream remains connected. The stream will disconnect implicitly - `ping-pong.cc` - In-order send/recv stream. This client sends a Request and the server Response with the same value for `batch_id`. ================================================ FILE: examples/nvRPC/StreamingService/client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "nvrpc/client/client_streaming.h" #include "nvrpc/client/executor.h" using grpc::Channel; using grpc::ClientContext; using grpc::Status; using nvrpc::client::ClientStreaming; using nvrpc::client::Executor; #include "echo.grpc.pb.h" using simple::Inference; using simple::Input; using simple::Output; static bool ValidateEven(const char* flagname, int value) { LOG_IF(ERROR, value % 2) << "Examples require an even number of messages"; return (value % 2 == 0); } DEFINE_int32(count, 100, "number of grpc messages to send"); DEFINE_validator(count, &ValidateEven); DEFINE_int32(thread_count, 1, "Size of thread pool"); DEFINE_string(hostname, "127.0.0.1:50051", "hostname and port"); int main(int argc, char** argv) { // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); std::mutex mutex; std::size_t count = 0; auto executor = std::make_shared(FLAGS_thread_count); auto channel = grpc::CreateChannel(FLAGS_hostname, grpc::InsecureChannelCredentials()); auto stub = Inference::NewStub(channel); auto infer_prepare_fn = [&stub](::grpc::ClientContext * context, ::grpc::CompletionQueue * cq) -> auto { return std::move(stub->PrepareAsyncBidirectional(context, cq)); }; auto stream = std::make_unique>( infer_prepare_fn, executor, [](Input&& request) { LOG_FIRST_N(INFO, 10) << "Sent Request with BatchID: " << request.batch_id(); }, [&mutex, &count](Output&& response) { static size_t last = 0; LOG_FIRST_N(INFO, 10) << "Received Response with BatchID: " << response.batch_id(); CHECK_EQ(++last, response.batch_id()); std::lock_guard lock(mutex); --count; }); auto start = std::chrono::steady_clock::now(); auto elapsed = [start]() -> float { return std::chrono::duration(std::chrono::steady_clock::now() - start).count(); }; for(int i = 1; i < FLAGS_count + 1; i++) { { std::lock_guard lock(mutex); ++count; } Input input; input.set_batch_id(i); stream->Write(std::move(input)); } std::cout << FLAGS_count << " queued in " << elapsed() << "seconds" << std::endl; auto future = stream->Done(); // auto future = stream->Status(); auto status = future.get(); std::cout << FLAGS_count << " completed in " << elapsed() << "seconds" << std::endl; std::cout << "gRPC Status: " << (status.ok() ? "OK" : "NOT OK") << std::endl; executor->ShutdownAndJoin(); CHECK_EQ(count, 0UL); return 0; } ================================================ FILE: examples/nvRPC/StreamingService/common.h ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include #include #include #include #include "tensorrt/laboratory/core/pool.h" #include "tensorrt/laboratory/core/resources.h" #include "tensorrt/laboratory/core/thread_pool.h" using trtlab::Resources; using trtlab::ThreadPool; #include "nvrpc/executor.h" #include "nvrpc/server.h" #include "nvrpc/service.h" using nvrpc::AsyncRPC; using nvrpc::AsyncService; using nvrpc::Executor; using nvrpc::Server; using nvrpc::StreamingContext; #include "echo.grpc.pb.h" #include "echo.pb.h" ================================================ FILE: examples/nvRPC/StreamingService/even-odds.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "common.h" // clang-format off struct SimpleResources : public Resources { SimpleResources(int numThreadsInPool = 3) : m_ThreadPool(numThreadsInPool) {} ThreadPool& AcquireThreadPool() { return m_ThreadPool; } private: ThreadPool m_ThreadPool; }; // clang-format on class SimpleContext final : public StreamingContext { void RequestReceived(RequestType&& input, std::shared_ptr stream) final override { LOG_FIRST_N(INFO, 10) << "BatchID: " << input.batch_id() << " Tag = " << Tag() << " Thread = " << std::this_thread::get_id(); // If even, send back two responses. // If odd, do nothing if(input.batch_id() % 2 == 0) { LOG_FIRST_N(INFO, 5) << "Received Even an BatchID: Sending back two responses"; for(int i = input.batch_id() - 1; i <= input.batch_id(); i++) { ResponseType output; output.set_batch_id(i); stream->WriteResponse(std::move(output)); } } else { LOG_FIRST_N(INFO, 5) << "Received an Odd BatchID: No Response will be sent"; } } }; // CLI Options DEFINE_int32(thread_count, 1, "Size of thread pool"); DEFINE_string(ip_port, "0.0.0.0:50051", "IP/Port"); int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("simpleServer"); ::google::ParseCommandLineFlags(&argc, &argv, true); Server server(FLAGS_ip_port); auto simpleInference = server.RegisterAsyncService(); auto rpcCompute = simpleInference->RegisterRPC( &simple::Inference::AsyncService::RequestBidirectional); auto rpcResources = std::make_shared(FLAGS_thread_count); auto executor = server.RegisterExecutor(new Executor(1)); executor->RegisterContexts(rpcCompute, rpcResources, 10); server.Run(std::chrono::milliseconds(2000), [] {}); } ================================================ FILE: examples/nvRPC/StreamingService/ping-pong.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "common.h" // clang-format off struct SimpleResources : public Resources { SimpleResources(int numThreadsInPool = 3) : m_ThreadPool(numThreadsInPool) {} ThreadPool& AcquireThreadPool() { return m_ThreadPool; } private: ThreadPool m_ThreadPool; }; // clang-format on class SimpleContext final : public StreamingContext { void RequestReceived(RequestType&& input, std::shared_ptr stream) final override { LOG_FIRST_N(INFO, 10) << "BatchID: " << input.batch_id() << " Tag = " << Tag() << " Thread = " << std::this_thread::get_id(); ResponseType output; output.set_batch_id(input.batch_id()); stream->WriteResponse(std::move(output)); // TODO: add a test which the server closes the before the client does // stream->FinishStream(); } }; // CLI Options DEFINE_int32(thread_count, 1, "Size of thread pool"); DEFINE_string(ip_port, "0.0.0.0:50051", "IP/Port"); int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("simpleServer"); ::google::ParseCommandLineFlags(&argc, &argv, true); Server server(FLAGS_ip_port); auto simpleInference = server.RegisterAsyncService(); auto rpcCompute = simpleInference->RegisterRPC( &simple::Inference::AsyncService::RequestBidirectional); auto rpcResources = std::make_shared(FLAGS_thread_count); auto executor = server.RegisterExecutor(new Executor(1)); executor->RegisterContexts(rpcCompute, rpcResources, 10); server.Run(std::chrono::milliseconds(2000), [] {}); } ================================================ FILE: examples/nvRPC/StreamingService/test.sh ================================================ #!/bin/bash cleanup() { kill $(jobs -p) ||: } trap "cleanup" EXIT SIGINT SIGTERM export PATH=".:$PATH" exe=${1:-"./nvrpc-ping-pong-server.x"} $exe --ip_port="0.0.0.0:5555" & f=$(mktmp) cat < $f PS1='nvRPC Bidirectional: ' go() { ./nvrpc-streaming-client.x --hostname="localhost:5555" --count=${1:-100} } EOF ps aux echo echo 'Try ./nvrpc-streaming-client.x --hostname="localhost:5555" --count=100' bash --rcfile <(echo "PS1='nvRPC StreamingService: '") ================================================ FILE: examples/nvRPC/UnaryService/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add_executable(nvrpc-unary-server.x server.cc) target_link_libraries(nvrpc-unary-server.x nvrpc echo-protos gflags ) add_executable(nvrpc-unary-client.x client.cc) target_link_libraries(nvrpc-unary-client.x nvrpc nvrpc-client echo-protos gflags ) ================================================ FILE: examples/nvRPC/UnaryService/client.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "nvrpc/client/client_unary.h" #include "nvrpc/client/executor.h" using grpc::Channel; using grpc::ClientContext; using grpc::Status; using nvrpc::client::ClientUnary; using nvrpc::client::Executor; #include "echo.grpc.pb.h" using simple::Inference; using simple::Input; using simple::Output; DEFINE_int32(count, 100, "number of grpc messages to send"); DEFINE_int32(thread_count, 1, "Size of thread pool"); int main(int argc, char** argv) { // Instantiate the client. It requires a channel, out of which the actual RPCs // are created. This channel models a connection to an endpoint (in this case, // localhost at port 50051). We indicate that the channel isn't authenticated // (use of InsecureChannelCredentials()). FLAGS_alsologtostderr = 1; // It will dump to console ::google::ParseCommandLineFlags(&argc, &argv, true); auto executor = std::make_shared(FLAGS_thread_count); auto channel = grpc::CreateChannel("localhost:50051", grpc::InsecureChannelCredentials()); auto stub = Inference::NewStub(channel); auto infer_prepare_fn = [&stub](::grpc::ClientContext * context, const ::simple::Input& request, ::grpc::CompletionQueue* cq) -> auto { return std::move(stub->PrepareAsyncCompute(context, request, cq)); }; auto runner = std::make_unique>(infer_prepare_fn, executor); auto start = std::chrono::steady_clock::now(); auto elapsed = [start]() -> float { return std::chrono::duration(std::chrono::steady_clock::now() - start).count(); }; for(int i = 0; i < FLAGS_count; i++) { Input input; input.set_batch_id(i); runner->Enqueue(std::move(input), [i](Input& input, Output& output, ::grpc::Status& status) -> bool { CHECK(output.batch_id() == i); LOG_FIRST_N(INFO, 20) << "Check: " << i; return (bool)(output.batch_id() == i); }); } std::cout << FLAGS_count << " queued in " << elapsed() << "seconds" << std::endl; executor->ShutdownAndJoin(); std::cout << FLAGS_count << " completed in " << elapsed() << "seconds" << std::endl; return 0; } ================================================ FILE: examples/nvRPC/UnaryService/server.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include "tensorrt/laboratory/core/pool.h" #include "tensorrt/laboratory/core/resources.h" #include "tensorrt/laboratory/core/thread_pool.h" using trtlab::Resources; using trtlab::ThreadPool; #include "nvrpc/executor.h" #include "nvrpc/server.h" #include "nvrpc/service.h" using nvrpc::AsyncRPC; using nvrpc::AsyncService; using nvrpc::Context; using nvrpc::Executor; using nvrpc::Server; #include "echo.grpc.pb.h" #include "echo.pb.h" // CLI Options DEFINE_int32(thread_count, 1, "Size of thread pool"); /** * Embedding a copy of the Protobuf specification for the gRPC service. * * Package Name: simple * Service Name: Inference * RPC Name: Compute * * Incoming Message: Input * Outgoing Message: Ouput ** syntax = "proto3"; package simple; service Inference { rpc Compute (Input) returns (Output) {} } message Input { uint64 batch_id = 1; } message Output { uint64 batch_id = 1; } */ // Define the resources your RPC will need to execute // ================================================== // In this case, all simple::Inference::Compute RPCs share a threadpool in which they will // queue up some work on. This essentially means, after the message as been received and // processed, the actual work for the RPC is pushed to a worker pool outside the scope of // the transaction processing system (TPS). This is essentially async computing, we have // decoupled the transaction from the workers executing the implementation. The TPS can // continue to queue work, while the workers process the load. struct SimpleResources : public Resources { SimpleResources(int numThreadsInPool = 3) : m_ThreadPool(numThreadsInPool) { LOG(INFO) << "Server ThreadCount: " << numThreadsInPool; } ThreadPool& AcquireThreadPool() { return m_ThreadPool; } private: ThreadPool m_ThreadPool; }; // Contexts hold the state and provide the definition of the work to be performed by the RPC. // This is where you define what gets executed for a given RPC. // Incoming Message = simple::Input (RequestType) // Outgoing Message = simple::Output (ResponseType) class SimpleContext final : public Context { void ExecuteRPC(RequestType& input, ResponseType& output) final override { // We could do work here, but we'd block the TPS, i.e. the threads pulling messages // off the incoming recieve queue. Very quick responses are best done here; however, // longer running workload should be offloaded so the TPS can avoid being blocked. GetResources()->AcquireThreadPool().enqueue([this, &input, &output] { // Now running on a worker thread of the ThreadPool defined in SimpleResources. // Here we are just echoing back the incoming // batch_id; however, in later // examples, we'll show how to run an async cuda pipline. LOG_FIRST_N(INFO, 20) << "Tag = " << Tag() << " Thread = " << std::this_thread::get_id(); output.set_batch_id(input.batch_id()); this->FinishResponse(); }); // The TPS thread is now free to continue processing message - async ftw! } }; int main(int argc, char* argv[]) { FLAGS_alsologtostderr = 1; // Log to console ::google::InitGoogleLogging("simpleServer"); ::google::ParseCommandLineFlags(&argc, &argv, true); // A server will bind an IP:PORT to listen on Server server("0.0.0.0:50051"); // A server can host multiple services LOG(INFO) << "Register Service (simple::Inference) with Server"; auto simpleInference = server.RegisterAsyncService(); // An RPC has two components that need to be specified when registering with the service: // 1) Type of Execution Context (SimpleContext). The execution context defines the behavor // of the RPC, i.e. it contains the control logic for the execution of the RPC. // 2) The Request function (RequestCompute) which was generated by gRPC when compiling the // protobuf which defined the service. This function is responsible for queuing the // RPC's execution context to the LOG(INFO) << "Register RPC (simple::Inference::Compute) with Service (simple::Inference)"; auto rpcCompute = simpleInference->RegisterRPC( &simple::Inference::AsyncService::RequestCompute); LOG(INFO) << "Initializing Resources for RPC (simple::Inference::Compute)"; auto rpcResources = std::make_shared(FLAGS_thread_count); // Create Executors - Executors provide the messaging processing resources for the RPCs // Multiple Executors can be registered with a Server. The executor is responsible // for pulling incoming message off the receive queue and executing the associated // context. By default, an executor only uses a single thread. A typical usecase is // an Executor executes a context, which immediate pushes the work to a thread pool. // However, for very low-latency messaging, you might want to use a multi-threaded // Executor and a Blocking Context - meaning the Context performs the entire RPC function // on the Executor's thread. LOG(INFO) << "Creating Executor"; auto executor = server.RegisterExecutor(new Executor(1)); // You can register RPC execution contexts from any registered RPC on any executor. // The power of that will become clear in later examples. For now, we will register // 10 instances of the simple::Inference::Compute RPC's SimpleContext execution context // with the Executor. LOG(INFO) << "Creating Execution Contexts for RPC (simple::Inference::Compute) with Executor"; executor->RegisterContexts(rpcCompute, rpcResources, 10); LOG(INFO) << "Running Server"; server.Run(std::chrono::milliseconds(2000), [] { // This is a timeout loop executed every 2seconds // Run() with no arguments will run an empty timeout loop every 5 seconds. // RunAsync() will return immediately, its your responsibility to ensure the // server doesn't go out of scope or a Shutdown will be triggered on your services. }); } ================================================ FILE: jupyter_notebook_config.py ================================================ # Configuration file for jupyter-notebook. #------------------------------------------------------------------------------ # Application(SingletonConfigurable) configuration #------------------------------------------------------------------------------ ## This is an application. ## The date format used by logging formatters for %(asctime)s #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S' ## The Logging format template #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s' ## Set the log level by value or name. #c.Application.log_level = 30 #------------------------------------------------------------------------------ # JupyterApp(Application) configuration #------------------------------------------------------------------------------ ## Base class for Jupyter applications ## Answer yes to any prompts. #c.JupyterApp.answer_yes = False ## Full path of a config file. #c.JupyterApp.config_file = '' ## Specify a config file to load. #c.JupyterApp.config_file_name = '' ## Generate default config file. #c.JupyterApp.generate_config = False #------------------------------------------------------------------------------ # NotebookApp(JupyterApp) configuration #------------------------------------------------------------------------------ ## Set the Access-Control-Allow-Credentials: true header #c.NotebookApp.allow_credentials = False ## Set the Access-Control-Allow-Origin header # # Use '*' to allow any origin to access your server. # # Takes precedence over allow_origin_pat. #c.NotebookApp.allow_origin = '' ## Use a regular expression for the Access-Control-Allow-Origin header # # Requests from an origin matching the expression will get replies with: # # Access-Control-Allow-Origin: origin # # where `origin` is the origin of the request. # # Ignored if allow_origin is set. #c.NotebookApp.allow_origin_pat = '' ## Allow password to be changed at login for the notebook server. # # While loggin in with a token, the notebook server UI will give the opportunity # to the user to enter a new password at the same time that will replace the # token login mechanism. # # This can be set to false to prevent changing password from the UI/API. #c.NotebookApp.allow_password_change = True ## Allow requests where the Host header doesn't point to a local server # # By default, requests get a 403 forbidden response if the 'Host' header shows # that the browser thinks it's on a non-local domain. Setting this option to # True disables this check. # # This protects against 'DNS rebinding' attacks, where a remote web server # serves you a page and then changes its DNS to send later requests to a local # IP, bypassing same-origin checks. # # Local IP addresses (such as 127.0.0.1 and ::1) are allowed as local, along # with hostnames configured in local_hostnames. c.NotebookApp.allow_remote_access = True ## Whether to allow the user to run the notebook as root. c.NotebookApp.allow_root = True ## DEPRECATED use base_url #c.NotebookApp.base_project_url = '/' ## The base URL for the notebook server. # # Leading and trailing slashes can be omitted, and will automatically be added. #c.NotebookApp.base_url = '/' ## Specify what command to use to invoke a web browser when opening the notebook. # If not specified, the default browser will be determined by the `webbrowser` # standard library module, which allows setting of the BROWSER environment # variable to override it. #c.NotebookApp.browser = '' ## The full path to an SSL/TLS certificate file. #c.NotebookApp.certfile = '' ## The full path to a certificate authority certificate for SSL/TLS client # authentication. #c.NotebookApp.client_ca = '' ## The config manager class to use #c.NotebookApp.config_manager_class = 'notebook.services.config.manager.ConfigManager' ## The notebook manager class to use. #c.NotebookApp.contents_manager_class = 'notebook.services.contents.largefilemanager.LargeFileManager' ## Extra keyword arguments to pass to `set_secure_cookie`. See tornado's # set_secure_cookie docs for details. #c.NotebookApp.cookie_options = {} ## The random bytes used to secure cookies. By default this is a new random # number every time you start the Notebook. Set it to a value in a config file # to enable logins to persist across server sessions. # # Note: Cookie secrets should be kept private, do not share config files with # cookie_secret stored in plaintext (you can read the value from a file). #c.NotebookApp.cookie_secret = b'' ## The file where the cookie secret is stored. #c.NotebookApp.cookie_secret_file = '' ## Override URL shown to users. # # Replace actual URL, including protocol, address, port and base URL, with the # given value when displaying URL to the users. Do not change the actual # connection URL. If authentication token is enabled, the token is added to the # custom URL automatically. # # This option is intended to be used when the URL to display to the user cannot # be determined reliably by the Jupyter notebook server (proxified or # containerized setups for example). #c.NotebookApp.custom_display_url = '' ## The default URL to redirect to from `/` #c.NotebookApp.default_url = '/tree' ## Disable cross-site-request-forgery protection # # Jupyter notebook 4.3.1 introduces protection from cross-site request # forgeries, requiring API requests to either: # # - originate from pages served by this server (validated with XSRF cookie and # token), or - authenticate with a token # # Some anonymous compute resources still desire the ability to run code, # completely without authentication. These services can disable all # authentication and security checks, with the full knowledge of what that # implies. #c.NotebookApp.disable_check_xsrf = False ## Whether to enable MathJax for typesetting math/TeX # # MathJax is the javascript library Jupyter uses to render math/LaTeX. It is # very large, so you may want to disable it if you have a slow internet # connection, or for offline use of the notebook. # # When disabled, equations etc. will appear as their untransformed TeX source. #c.NotebookApp.enable_mathjax = True ## extra paths to look for Javascript notebook extensions #c.NotebookApp.extra_nbextensions_path = [] ## handlers that should be loaded at higher priority than the default services #c.NotebookApp.extra_services = [] ## Extra paths to search for serving static files. # # This allows adding javascript/css to be available from the notebook server # machine, or overriding individual files in the IPython #c.NotebookApp.extra_static_paths = [] ## Extra paths to search for serving jinja templates. # # Can be used to override templates from notebook.templates. #c.NotebookApp.extra_template_paths = [] ## #c.NotebookApp.file_to_run = '' ## Extra keyword arguments to pass to `get_secure_cookie`. See tornado's # get_secure_cookie docs for details. #c.NotebookApp.get_secure_cookie_kwargs = {} ## Deprecated: Use minified JS file or not, mainly use during dev to avoid JS # recompilation #c.NotebookApp.ignore_minified_js = False ## (bytes/sec) Maximum rate at which stream output can be sent on iopub before # they are limited. #c.NotebookApp.iopub_data_rate_limit = 1000000 ## (msgs/sec) Maximum rate at which messages can be sent on iopub before they are # limited. #c.NotebookApp.iopub_msg_rate_limit = 1000 ## The IP address the notebook server will listen on. c.NotebookApp.ip = '0.0.0.0' ## Supply extra arguments that will be passed to Jinja environment. #c.NotebookApp.jinja_environment_options = {} ## Extra variables to supply to jinja templates when rendering. #c.NotebookApp.jinja_template_vars = {} ## The kernel manager class to use. #c.NotebookApp.kernel_manager_class = 'notebook.services.kernels.kernelmanager.MappingKernelManager' ## The kernel spec manager class to use. Should be a subclass of # `jupyter_client.kernelspec.KernelSpecManager`. # # The Api of KernelSpecManager is provisional and might change without warning # between this version of Jupyter and the next stable one. #c.NotebookApp.kernel_spec_manager_class = 'jupyter_client.kernelspec.KernelSpecManager' ## The full path to a private key file for usage with SSL/TLS. #c.NotebookApp.keyfile = '' ## Hostnames to allow as local when allow_remote_access is False. # # Local IP addresses (such as 127.0.0.1 and ::1) are automatically accepted as # local as well. #c.NotebookApp.local_hostnames = ['localhost'] ## The login handler class to use. #c.NotebookApp.login_handler_class = 'notebook.auth.login.LoginHandler' ## The logout handler class to use. #c.NotebookApp.logout_handler_class = 'notebook.auth.logout.LogoutHandler' ## The MathJax.js configuration file that is to be used. #c.NotebookApp.mathjax_config = 'TeX-AMS-MML_HTMLorMML-full,Safe' ## A custom url for MathJax.js. Should be in the form of a case-sensitive url to # MathJax, for example: /static/components/MathJax/MathJax.js #c.NotebookApp.mathjax_url = '' ## Sets the maximum allowed size of the client request body, specified in the # Content-Length request header field. If the size in a request exceeds the # configured value, a malformed HTTP message is returned to the client. # # Note: max_body_size is applied even in streaming mode. #c.NotebookApp.max_body_size = 536870912 ## Gets or sets the maximum amount of memory, in bytes, that is allocated for # use by the buffer manager. #c.NotebookApp.max_buffer_size = 536870912 ## Dict of Python modules to load as notebook server extensions.Entry values can # be used to enable and disable the loading ofthe extensions. The extensions # will be loaded in alphabetical order. #c.NotebookApp.nbserver_extensions = {} ## The directory to use for notebooks and kernels. c.NotebookApp.notebook_dir = '/work' ## Whether to open in a browser after starting. The specific browser used is # platform dependent and determined by the python standard library `webbrowser` # module, unless it is overridden using the --browser (NotebookApp.browser) # configuration option. #c.NotebookApp.open_browser = True ## Hashed password to use for web authentication. # # To generate, type in a python/IPython shell: # # from notebook.auth import passwd; passwd() # # The string should be of the form type:salt:hashed-password. #c.NotebookApp.password = '' ## Forces users to use a password for the Notebook server. This is useful in a # multi user environment, for instance when everybody in the LAN can access each # other's machine through ssh. # # In such a case, server the notebook server on localhost is not secure since # any user can connect to the notebook server via ssh. #c.NotebookApp.password_required = False ## The port the notebook server will listen on. #c.NotebookApp.port = 8888 ## The number of additional ports to try if the specified port is not available. #c.NotebookApp.port_retries = 50 ## DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. #c.NotebookApp.pylab = 'disabled' ## If True, display a button in the dashboard to quit (shutdown the notebook # server). #c.NotebookApp.quit_button = True ## (sec) Time window used to check the message and data rate limits. #c.NotebookApp.rate_limit_window = 3 ## Reraise exceptions encountered loading server extensions? #c.NotebookApp.reraise_server_extension_failures = False ## DEPRECATED use the nbserver_extensions dict instead #c.NotebookApp.server_extensions = [] ## The session manager class to use. #c.NotebookApp.session_manager_class = 'notebook.services.sessions.sessionmanager.SessionManager' ## Shut down the server after N seconds with no kernels or terminals running and # no activity. This can be used together with culling idle kernels # (MappingKernelManager.cull_idle_timeout) to shutdown the notebook server when # it's not in use. This is not precisely timed: it may shut down up to a minute # later. 0 (the default) disables this automatic shutdown. #c.NotebookApp.shutdown_no_activity_timeout = 0 ## Supply SSL options for the tornado HTTPServer. See the tornado docs for # details. #c.NotebookApp.ssl_options = {} ## Supply overrides for terminado. Currently only supports "shell_command". #c.NotebookApp.terminado_settings = {} ## Set to False to disable terminals. # # This does *not* make the notebook server more secure by itself. Anything the # user can in a terminal, they can also do in a notebook. # # Terminals may also be automatically disabled if the terminado package is not # available. #c.NotebookApp.terminals_enabled = True ## Token used for authenticating first-time connections to the server. # # When no password is enabled, the default is to generate a new, random token. # # Setting to an empty string disables authentication altogether, which is NOT # RECOMMENDED. #c.NotebookApp.token = '' ## Supply overrides for the tornado.web.Application that the Jupyter notebook # uses. #c.NotebookApp.tornado_settings = {} ## Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded- # For headerssent by the upstream reverse proxy. Necessary if the proxy handles # SSL #c.NotebookApp.trust_xheaders = False ## DEPRECATED, use tornado_settings #c.NotebookApp.webapp_settings = {} ## Specify Where to open the notebook on startup. This is the `new` argument # passed to the standard library method `webbrowser.open`. The behaviour is not # guaranteed, but depends on browser support. Valid values are: # # - 2 opens a new tab, # - 1 opens a new window, # - 0 opens in an existing window. # # See the `webbrowser.open` documentation for details. #c.NotebookApp.webbrowser_open_new = 2 ## Set the tornado compression options for websocket connections. # # This value will be returned from # :meth:`WebSocketHandler.get_compression_options`. None (default) will disable # compression. A dict (even an empty one) will enable compression. # # See the tornado docs for WebSocketHandler.get_compression_options for details. #c.NotebookApp.websocket_compression_options = None ## The base URL for websockets, if it differs from the HTTP server (hint: it # almost certainly doesn't). # # Should be in the form of an HTTP origin: ws[s]://hostname[:port] #c.NotebookApp.websocket_url = '' #------------------------------------------------------------------------------ # ConnectionFileMixin(LoggingConfigurable) configuration #------------------------------------------------------------------------------ ## Mixin for configurable classes that work with connection files ## JSON file in which to store connection info [default: kernel-.json] # # This file will contain the IP, ports, and authentication key needed to connect # clients to this kernel. By default, this file will be created in the security # dir of the current profile, but can be specified by absolute path. #c.ConnectionFileMixin.connection_file = '' ## set the control (ROUTER) port [default: random] #c.ConnectionFileMixin.control_port = 0 ## set the heartbeat port [default: random] #c.ConnectionFileMixin.hb_port = 0 ## set the iopub (PUB) port [default: random] #c.ConnectionFileMixin.iopub_port = 0 ## Set the kernel's IP address [default localhost]. If the IP address is # something other than localhost, then Consoles on other machines will be able # to connect to the Kernel, so be careful! #c.ConnectionFileMixin.ip = '' ## set the shell (ROUTER) port [default: random] #c.ConnectionFileMixin.shell_port = 0 ## set the stdin (ROUTER) port [default: random] #c.ConnectionFileMixin.stdin_port = 0 ## #c.ConnectionFileMixin.transport = 'tcp' #------------------------------------------------------------------------------ # KernelManager(ConnectionFileMixin) configuration #------------------------------------------------------------------------------ ## Manages a single kernel in a subprocess on this host. # # This version starts kernels with Popen. ## Should we autorestart the kernel if it dies. #c.KernelManager.autorestart = True ## DEPRECATED: Use kernel_name instead. # # The Popen Command to launch the kernel. Override this if you have a custom # kernel. If kernel_cmd is specified in a configuration file, Jupyter does not # pass any arguments to the kernel, because it cannot make any assumptions about # the arguments that the kernel understands. In particular, this means that the # kernel does not receive the option --debug if it given on the Jupyter command # line. #c.KernelManager.kernel_cmd = [] ## Time to wait for a kernel to terminate before killing it, in seconds. #c.KernelManager.shutdown_wait_time = 5.0 #------------------------------------------------------------------------------ # Session(Configurable) configuration #------------------------------------------------------------------------------ ## Object for handling serialization and sending of messages. # # The Session object handles building messages and sending them with ZMQ sockets # or ZMQStream objects. Objects can communicate with each other over the # network via Session objects, and only need to work with the dict-based IPython # message spec. The Session will handle serialization/deserialization, security, # and metadata. # # Sessions support configurable serialization via packer/unpacker traits, and # signing with HMAC digests via the key/keyfile traits. # # Parameters ---------- # # debug : bool # whether to trigger extra debugging statements # packer/unpacker : str : 'json', 'pickle' or import_string # importstrings for methods to serialize message parts. If just # 'json' or 'pickle', predefined JSON and pickle packers will be used. # Otherwise, the entire importstring must be used. # # The functions must accept at least valid JSON input, and output *bytes*. # # For example, to use msgpack: # packer = 'msgpack.packb', unpacker='msgpack.unpackb' # pack/unpack : callables # You can also set the pack/unpack callables for serialization directly. # session : bytes # the ID of this Session object. The default is to generate a new UUID. # username : unicode # username added to message headers. The default is to ask the OS. # key : bytes # The key used to initialize an HMAC signature. If unset, messages # will not be signed or checked. # keyfile : filepath # The file containing a key. If this is set, `key` will be initialized # to the contents of the file. ## Threshold (in bytes) beyond which an object's buffer should be extracted to # avoid pickling. #c.Session.buffer_threshold = 1024 ## Whether to check PID to protect against calls after fork. # # This check can be disabled if fork-safety is handled elsewhere. #c.Session.check_pid = True ## Threshold (in bytes) beyond which a buffer should be sent without copying. #c.Session.copy_threshold = 65536 ## Debug output in the Session #c.Session.debug = False ## The maximum number of digests to remember. # # The digest history will be culled when it exceeds this value. #c.Session.digest_history_size = 65536 ## The maximum number of items for a container to be introspected for custom # serialization. Containers larger than this are pickled outright. #c.Session.item_threshold = 64 ## execution key, for signing messages. #c.Session.key = b'' ## path to file containing execution key. #c.Session.keyfile = '' ## Metadata dictionary, which serves as the default top-level metadata dict for # each message. #c.Session.metadata = {} ## The name of the packer for serializing messages. Should be one of 'json', # 'pickle', or an import name for a custom callable serializer. #c.Session.packer = 'json' ## The UUID identifying this session. #c.Session.session = '' ## The digest scheme used to construct the message signatures. Must have the form # 'hmac-HASH'. #c.Session.signature_scheme = 'hmac-sha256' ## The name of the unpacker for unserializing messages. Only used with custom # functions for `packer`. #c.Session.unpacker = 'json' ## Username for the Session. Default is your system username. #c.Session.username = 'username' #------------------------------------------------------------------------------ # MultiKernelManager(LoggingConfigurable) configuration #------------------------------------------------------------------------------ ## A class for managing multiple kernels. ## The name of the default kernel to start #c.MultiKernelManager.default_kernel_name = 'python3' ## The kernel manager class. This is configurable to allow subclassing of the # KernelManager for customized behavior. #c.MultiKernelManager.kernel_manager_class = 'jupyter_client.ioloop.IOLoopKernelManager' #------------------------------------------------------------------------------ # MappingKernelManager(MultiKernelManager) configuration #------------------------------------------------------------------------------ ## A KernelManager that handles notebook mapping and HTTP error handling ## Whether messages from kernels whose frontends have disconnected should be # buffered in-memory. # # When True (default), messages are buffered and replayed on reconnect, avoiding # lost messages due to interrupted connectivity. # # Disable if long-running kernels will produce too much output while no # frontends are connected. #c.MappingKernelManager.buffer_offline_messages = True ## Whether to consider culling kernels which are busy. Only effective if # cull_idle_timeout > 0. #c.MappingKernelManager.cull_busy = False ## Whether to consider culling kernels which have one or more connections. Only # effective if cull_idle_timeout > 0. #c.MappingKernelManager.cull_connected = False ## Timeout (in seconds) after which a kernel is considered idle and ready to be # culled. Values of 0 or lower disable culling. Very short timeouts may result # in kernels being culled for users with poor network connections. #c.MappingKernelManager.cull_idle_timeout = 0 ## The interval (in seconds) on which to check for idle kernels exceeding the # cull timeout value. #c.MappingKernelManager.cull_interval = 300 ## Timeout for giving up on a kernel (in seconds). # # On starting and restarting kernels, we check whether the kernel is running and # responsive by sending kernel_info_requests. This sets the timeout in seconds # for how long the kernel can take before being presumed dead. This affects the # MappingKernelManager (which handles kernel restarts) and the # ZMQChannelsHandler (which handles the startup). #c.MappingKernelManager.kernel_info_timeout = 60 ## #c.MappingKernelManager.root_dir = '' #------------------------------------------------------------------------------ # ContentsManager(LoggingConfigurable) configuration #------------------------------------------------------------------------------ ## Base class for serving files and directories. # # This serves any text or binary file, as well as directories, with special # handling for JSON notebook documents. # # Most APIs take a path argument, which is always an API-style unicode path, and # always refers to a directory. # # - unicode, not url-escaped # - '/'-separated # - leading and trailing '/' will be stripped # - if unspecified, path defaults to '', # indicating the root path. ## Allow access to hidden files #c.ContentsManager.allow_hidden = False ## #c.ContentsManager.checkpoints = None ## #c.ContentsManager.checkpoints_class = 'notebook.services.contents.checkpoints.Checkpoints' ## #c.ContentsManager.checkpoints_kwargs = {} ## handler class to use when serving raw file requests. # # Default is a fallback that talks to the ContentsManager API, which may be # inefficient, especially for large files. # # Local files-based ContentsManagers can use a StaticFileHandler subclass, which # will be much more efficient. # # Access to these files should be Authenticated. #c.ContentsManager.files_handler_class = 'notebook.files.handlers.FilesHandler' ## Extra parameters to pass to files_handler_class. # # For example, StaticFileHandlers generally expect a `path` argument specifying # the root directory from which to serve files. #c.ContentsManager.files_handler_params = {} ## Glob patterns to hide in file and directory listings. #c.ContentsManager.hide_globs = ['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dylib', '*~'] ## Python callable or importstring thereof # # To be called on a contents model prior to save. # # This can be used to process the structure, such as removing notebook outputs # or other side effects that should not be saved. # # It will be called as (all arguments passed by keyword):: # # hook(path=path, model=model, contents_manager=self) # # - model: the model to be saved. Includes file contents. # Modifying this dict will affect the file that is stored. # - path: the API path of the save destination # - contents_manager: this ContentsManager instance #c.ContentsManager.pre_save_hook = None ## #c.ContentsManager.root_dir = '/' ## The base name used when creating untitled directories. #c.ContentsManager.untitled_directory = 'Untitled Folder' ## The base name used when creating untitled files. #c.ContentsManager.untitled_file = 'untitled' ## The base name used when creating untitled notebooks. #c.ContentsManager.untitled_notebook = 'Untitled' #------------------------------------------------------------------------------ # FileManagerMixin(Configurable) configuration #------------------------------------------------------------------------------ ## Mixin for ContentsAPI classes that interact with the filesystem. # # Provides facilities for reading, writing, and copying both notebooks and # generic files. # # Shared by FileContentsManager and FileCheckpoints. # # Note ---- Classes using this mixin must provide the following attributes: # # root_dir : unicode # A directory against against which API-style paths are to be resolved. # # log : logging.Logger ## By default notebooks are saved on disk on a temporary file and then if # succefully written, it replaces the old ones. This procedure, namely # 'atomic_writing', causes some bugs on file system whitout operation order # enforcement (like some networked fs). If set to False, the new notebook is # written directly on the old one which could fail (eg: full filesystem or quota # ) #c.FileManagerMixin.use_atomic_writing = True #------------------------------------------------------------------------------ # FileContentsManager(FileManagerMixin,ContentsManager) configuration #------------------------------------------------------------------------------ ## If True (default), deleting files will send them to the platform's # trash/recycle bin, where they can be recovered. If False, deleting files # really deletes them. #c.FileContentsManager.delete_to_trash = True ## Python callable or importstring thereof # # to be called on the path of a file just saved. # # This can be used to process the file on disk, such as converting the notebook # to a script or HTML via nbconvert. # # It will be called as (all arguments passed by keyword):: # # hook(os_path=os_path, model=model, contents_manager=instance) # # - path: the filesystem path to the file just written - model: the model # representing the file - contents_manager: this ContentsManager instance #c.FileContentsManager.post_save_hook = None ## #c.FileContentsManager.root_dir = '' ## DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0 #c.FileContentsManager.save_script = False #------------------------------------------------------------------------------ # NotebookNotary(LoggingConfigurable) configuration #------------------------------------------------------------------------------ ## A class for computing and verifying notebook signatures. ## The hashing algorithm used to sign notebooks. #c.NotebookNotary.algorithm = 'sha256' ## The sqlite file in which to store notebook signatures. By default, this will # be in your Jupyter data directory. You can set it to ':memory:' to disable # sqlite writing to the filesystem. #c.NotebookNotary.db_file = '' ## The secret key with which notebooks are signed. #c.NotebookNotary.secret = b'' ## The file where the secret key is stored. #c.NotebookNotary.secret_file = '' ## A callable returning the storage backend for notebook signatures. The default # uses an SQLite database. #c.NotebookNotary.store_factory = traitlets.Undefined #------------------------------------------------------------------------------ # KernelSpecManager(LoggingConfigurable) configuration #------------------------------------------------------------------------------ ## If there is no Python kernelspec registered and the IPython kernel is # available, ensure it is added to the spec list. #c.KernelSpecManager.ensure_native_kernel = True ## The kernel spec class. This is configurable to allow subclassing of the # KernelSpecManager for customized behavior. #c.KernelSpecManager.kernel_spec_class = 'jupyter_client.kernelspec.KernelSpec' ## Whitelist of allowed kernel names. # # By default, all installed kernels are allowed. #c.KernelSpecManager.whitelist = set() ================================================ FILE: models/README.md ================================================ ## Sample Models Included in this folder are a collection of open source models and some scripts to build TensorRT engines from these models. Currently, the samples provided only generate TensorRT engines with random weights and are only good for synthetic tests. TODO: update scripts to pull open sourced weights for fully functional models. ## Credits Caffe ResNet-50 and ResNet-152 models from [KaimingHe/deep-residual-networks] (https://github.com/KaimingHe/deep-residual-networks) are included without modification. > The MIT License (MIT) > > Copyright (c) 2016 Shaoqing Ren > > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal > in the Software without restriction, including without limitation the rights > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > copies of the Software, and to permit persons to whom the Software is > furnished to do so, subject to the following conditions: > > The above copyright notice and this permission notice shall be included in all > copies or substantial portions of the Software. > > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. ================================================ FILE: models/ResNet-152-deploy.prototxt ================================================ name: "ResNet-152" input: "data" input_dim: 1 input_dim: 3 input_dim: 224 input_dim: 224 layer { bottom: "data" top: "conv1" name: "conv1" type: "Convolution" convolution_param { num_output: 64 kernel_size: 7 pad: 3 stride: 2 bias_term: false } } layer { bottom: "conv1" top: "conv1" name: "bn_conv1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "conv1" top: "conv1" name: "scale_conv1" type: "Scale" scale_param { bias_term: true } } layer { top: "conv1" bottom: "conv1" name: "conv1_relu" type: "ReLU" } layer { bottom: "conv1" top: "pool1" name: "pool1" type: "Pooling" pooling_param { kernel_size: 3 stride: 2 pool: MAX } } layer { bottom: "pool1" top: "res2a_branch1" name: "res2a_branch1" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2a_branch1" top: "res2a_branch1" name: "bn2a_branch1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2a_branch1" top: "res2a_branch1" name: "scale2a_branch1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "pool1" top: "res2a_branch2a" name: "res2a_branch2a" type: "Convolution" convolution_param { num_output: 64 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2a_branch2a" top: "res2a_branch2a" name: "bn2a_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2a_branch2a" top: "res2a_branch2a" name: "scale2a_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res2a_branch2a" bottom: "res2a_branch2a" name: "res2a_branch2a_relu" type: "ReLU" } layer { bottom: "res2a_branch2a" top: "res2a_branch2b" name: "res2a_branch2b" type: "Convolution" convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res2a_branch2b" top: "res2a_branch2b" name: "bn2a_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2a_branch2b" top: "res2a_branch2b" name: "scale2a_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res2a_branch2b" bottom: "res2a_branch2b" name: "res2a_branch2b_relu" type: "ReLU" } layer { bottom: "res2a_branch2b" top: "res2a_branch2c" name: "res2a_branch2c" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2a_branch2c" top: "res2a_branch2c" name: "bn2a_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2a_branch2c" top: "res2a_branch2c" name: "scale2a_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2a_branch1" bottom: "res2a_branch2c" top: "res2a" name: "res2a" type: "Eltwise" } layer { bottom: "res2a" top: "res2a" name: "res2a_relu" type: "ReLU" } layer { bottom: "res2a" top: "res2b_branch2a" name: "res2b_branch2a" type: "Convolution" convolution_param { num_output: 64 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2b_branch2a" top: "res2b_branch2a" name: "bn2b_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2b_branch2a" top: "res2b_branch2a" name: "scale2b_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res2b_branch2a" bottom: "res2b_branch2a" name: "res2b_branch2a_relu" type: "ReLU" } layer { bottom: "res2b_branch2a" top: "res2b_branch2b" name: "res2b_branch2b" type: "Convolution" convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res2b_branch2b" top: "res2b_branch2b" name: "bn2b_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2b_branch2b" top: "res2b_branch2b" name: "scale2b_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res2b_branch2b" bottom: "res2b_branch2b" name: "res2b_branch2b_relu" type: "ReLU" } layer { bottom: "res2b_branch2b" top: "res2b_branch2c" name: "res2b_branch2c" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2b_branch2c" top: "res2b_branch2c" name: "bn2b_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2b_branch2c" top: "res2b_branch2c" name: "scale2b_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2a" bottom: "res2b_branch2c" top: "res2b" name: "res2b" type: "Eltwise" } layer { bottom: "res2b" top: "res2b" name: "res2b_relu" type: "ReLU" } layer { bottom: "res2b" top: "res2c_branch2a" name: "res2c_branch2a" type: "Convolution" convolution_param { num_output: 64 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2c_branch2a" top: "res2c_branch2a" name: "bn2c_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2c_branch2a" top: "res2c_branch2a" name: "scale2c_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res2c_branch2a" bottom: "res2c_branch2a" name: "res2c_branch2a_relu" type: "ReLU" } layer { bottom: "res2c_branch2a" top: "res2c_branch2b" name: "res2c_branch2b" type: "Convolution" convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res2c_branch2b" top: "res2c_branch2b" name: "bn2c_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2c_branch2b" top: "res2c_branch2b" name: "scale2c_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res2c_branch2b" bottom: "res2c_branch2b" name: "res2c_branch2b_relu" type: "ReLU" } layer { bottom: "res2c_branch2b" top: "res2c_branch2c" name: "res2c_branch2c" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2c_branch2c" top: "res2c_branch2c" name: "bn2c_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2c_branch2c" top: "res2c_branch2c" name: "scale2c_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2b" bottom: "res2c_branch2c" top: "res2c" name: "res2c" type: "Eltwise" } layer { bottom: "res2c" top: "res2c" name: "res2c_relu" type: "ReLU" } layer { bottom: "res2c" top: "res3a_branch1" name: "res3a_branch1" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res3a_branch1" top: "res3a_branch1" name: "bn3a_branch1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3a_branch1" top: "res3a_branch1" name: "scale3a_branch1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2c" top: "res3a_branch2a" name: "res3a_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res3a_branch2a" top: "res3a_branch2a" name: "bn3a_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3a_branch2a" top: "res3a_branch2a" name: "scale3a_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res3a_branch2a" bottom: "res3a_branch2a" name: "res3a_branch2a_relu" type: "ReLU" } layer { bottom: "res3a_branch2a" top: "res3a_branch2b" name: "res3a_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3a_branch2b" top: "res3a_branch2b" name: "bn3a_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3a_branch2b" top: "res3a_branch2b" name: "scale3a_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res3a_branch2b" bottom: "res3a_branch2b" name: "res3a_branch2b_relu" type: "ReLU" } layer { bottom: "res3a_branch2b" top: "res3a_branch2c" name: "res3a_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3a_branch2c" top: "res3a_branch2c" name: "bn3a_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3a_branch2c" top: "res3a_branch2c" name: "scale3a_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3a_branch1" bottom: "res3a_branch2c" top: "res3a" name: "res3a" type: "Eltwise" } layer { bottom: "res3a" top: "res3a" name: "res3a_relu" type: "ReLU" } layer { bottom: "res3a" top: "res3b1_branch2a" name: "res3b1_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b1_branch2a" top: "res3b1_branch2a" name: "bn3b1_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b1_branch2a" top: "res3b1_branch2a" name: "scale3b1_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b1_branch2a" bottom: "res3b1_branch2a" name: "res3b1_branch2a_relu" type: "ReLU" } layer { bottom: "res3b1_branch2a" top: "res3b1_branch2b" name: "res3b1_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3b1_branch2b" top: "res3b1_branch2b" name: "bn3b1_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b1_branch2b" top: "res3b1_branch2b" name: "scale3b1_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b1_branch2b" bottom: "res3b1_branch2b" name: "res3b1_branch2b_relu" type: "ReLU" } layer { bottom: "res3b1_branch2b" top: "res3b1_branch2c" name: "res3b1_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b1_branch2c" top: "res3b1_branch2c" name: "bn3b1_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b1_branch2c" top: "res3b1_branch2c" name: "scale3b1_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3a" bottom: "res3b1_branch2c" top: "res3b1" name: "res3b1" type: "Eltwise" } layer { bottom: "res3b1" top: "res3b1" name: "res3b1_relu" type: "ReLU" } layer { bottom: "res3b1" top: "res3b2_branch2a" name: "res3b2_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b2_branch2a" top: "res3b2_branch2a" name: "bn3b2_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b2_branch2a" top: "res3b2_branch2a" name: "scale3b2_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b2_branch2a" bottom: "res3b2_branch2a" name: "res3b2_branch2a_relu" type: "ReLU" } layer { bottom: "res3b2_branch2a" top: "res3b2_branch2b" name: "res3b2_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3b2_branch2b" top: "res3b2_branch2b" name: "bn3b2_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b2_branch2b" top: "res3b2_branch2b" name: "scale3b2_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b2_branch2b" bottom: "res3b2_branch2b" name: "res3b2_branch2b_relu" type: "ReLU" } layer { bottom: "res3b2_branch2b" top: "res3b2_branch2c" name: "res3b2_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b2_branch2c" top: "res3b2_branch2c" name: "bn3b2_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b2_branch2c" top: "res3b2_branch2c" name: "scale3b2_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b1" bottom: "res3b2_branch2c" top: "res3b2" name: "res3b2" type: "Eltwise" } layer { bottom: "res3b2" top: "res3b2" name: "res3b2_relu" type: "ReLU" } layer { bottom: "res3b2" top: "res3b3_branch2a" name: "res3b3_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b3_branch2a" top: "res3b3_branch2a" name: "bn3b3_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b3_branch2a" top: "res3b3_branch2a" name: "scale3b3_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b3_branch2a" bottom: "res3b3_branch2a" name: "res3b3_branch2a_relu" type: "ReLU" } layer { bottom: "res3b3_branch2a" top: "res3b3_branch2b" name: "res3b3_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3b3_branch2b" top: "res3b3_branch2b" name: "bn3b3_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b3_branch2b" top: "res3b3_branch2b" name: "scale3b3_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b3_branch2b" bottom: "res3b3_branch2b" name: "res3b3_branch2b_relu" type: "ReLU" } layer { bottom: "res3b3_branch2b" top: "res3b3_branch2c" name: "res3b3_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b3_branch2c" top: "res3b3_branch2c" name: "bn3b3_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b3_branch2c" top: "res3b3_branch2c" name: "scale3b3_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b2" bottom: "res3b3_branch2c" top: "res3b3" name: "res3b3" type: "Eltwise" } layer { bottom: "res3b3" top: "res3b3" name: "res3b3_relu" type: "ReLU" } layer { bottom: "res3b3" top: "res3b4_branch2a" name: "res3b4_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b4_branch2a" top: "res3b4_branch2a" name: "bn3b4_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b4_branch2a" top: "res3b4_branch2a" name: "scale3b4_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b4_branch2a" bottom: "res3b4_branch2a" name: "res3b4_branch2a_relu" type: "ReLU" } layer { bottom: "res3b4_branch2a" top: "res3b4_branch2b" name: "res3b4_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3b4_branch2b" top: "res3b4_branch2b" name: "bn3b4_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b4_branch2b" top: "res3b4_branch2b" name: "scale3b4_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b4_branch2b" bottom: "res3b4_branch2b" name: "res3b4_branch2b_relu" type: "ReLU" } layer { bottom: "res3b4_branch2b" top: "res3b4_branch2c" name: "res3b4_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b4_branch2c" top: "res3b4_branch2c" name: "bn3b4_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b4_branch2c" top: "res3b4_branch2c" name: "scale3b4_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b3" bottom: "res3b4_branch2c" top: "res3b4" name: "res3b4" type: "Eltwise" } layer { bottom: "res3b4" top: "res3b4" name: "res3b4_relu" type: "ReLU" } layer { bottom: "res3b4" top: "res3b5_branch2a" name: "res3b5_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b5_branch2a" top: "res3b5_branch2a" name: "bn3b5_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b5_branch2a" top: "res3b5_branch2a" name: "scale3b5_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b5_branch2a" bottom: "res3b5_branch2a" name: "res3b5_branch2a_relu" type: "ReLU" } layer { bottom: "res3b5_branch2a" top: "res3b5_branch2b" name: "res3b5_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3b5_branch2b" top: "res3b5_branch2b" name: "bn3b5_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b5_branch2b" top: "res3b5_branch2b" name: "scale3b5_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b5_branch2b" bottom: "res3b5_branch2b" name: "res3b5_branch2b_relu" type: "ReLU" } layer { bottom: "res3b5_branch2b" top: "res3b5_branch2c" name: "res3b5_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b5_branch2c" top: "res3b5_branch2c" name: "bn3b5_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b5_branch2c" top: "res3b5_branch2c" name: "scale3b5_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b4" bottom: "res3b5_branch2c" top: "res3b5" name: "res3b5" type: "Eltwise" } layer { bottom: "res3b5" top: "res3b5" name: "res3b5_relu" type: "ReLU" } layer { bottom: "res3b5" top: "res3b6_branch2a" name: "res3b6_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b6_branch2a" top: "res3b6_branch2a" name: "bn3b6_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b6_branch2a" top: "res3b6_branch2a" name: "scale3b6_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b6_branch2a" bottom: "res3b6_branch2a" name: "res3b6_branch2a_relu" type: "ReLU" } layer { bottom: "res3b6_branch2a" top: "res3b6_branch2b" name: "res3b6_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3b6_branch2b" top: "res3b6_branch2b" name: "bn3b6_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b6_branch2b" top: "res3b6_branch2b" name: "scale3b6_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b6_branch2b" bottom: "res3b6_branch2b" name: "res3b6_branch2b_relu" type: "ReLU" } layer { bottom: "res3b6_branch2b" top: "res3b6_branch2c" name: "res3b6_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b6_branch2c" top: "res3b6_branch2c" name: "bn3b6_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b6_branch2c" top: "res3b6_branch2c" name: "scale3b6_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b5" bottom: "res3b6_branch2c" top: "res3b6" name: "res3b6" type: "Eltwise" } layer { bottom: "res3b6" top: "res3b6" name: "res3b6_relu" type: "ReLU" } layer { bottom: "res3b6" top: "res3b7_branch2a" name: "res3b7_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b7_branch2a" top: "res3b7_branch2a" name: "bn3b7_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b7_branch2a" top: "res3b7_branch2a" name: "scale3b7_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b7_branch2a" bottom: "res3b7_branch2a" name: "res3b7_branch2a_relu" type: "ReLU" } layer { bottom: "res3b7_branch2a" top: "res3b7_branch2b" name: "res3b7_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3b7_branch2b" top: "res3b7_branch2b" name: "bn3b7_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b7_branch2b" top: "res3b7_branch2b" name: "scale3b7_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res3b7_branch2b" bottom: "res3b7_branch2b" name: "res3b7_branch2b_relu" type: "ReLU" } layer { bottom: "res3b7_branch2b" top: "res3b7_branch2c" name: "res3b7_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b7_branch2c" top: "res3b7_branch2c" name: "bn3b7_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b7_branch2c" top: "res3b7_branch2c" name: "scale3b7_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b6" bottom: "res3b7_branch2c" top: "res3b7" name: "res3b7" type: "Eltwise" } layer { bottom: "res3b7" top: "res3b7" name: "res3b7_relu" type: "ReLU" } layer { bottom: "res3b7" top: "res4a_branch1" name: "res4a_branch1" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res4a_branch1" top: "res4a_branch1" name: "bn4a_branch1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4a_branch1" top: "res4a_branch1" name: "scale4a_branch1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b7" top: "res4a_branch2a" name: "res4a_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res4a_branch2a" top: "res4a_branch2a" name: "bn4a_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4a_branch2a" top: "res4a_branch2a" name: "scale4a_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4a_branch2a" bottom: "res4a_branch2a" name: "res4a_branch2a_relu" type: "ReLU" } layer { bottom: "res4a_branch2a" top: "res4a_branch2b" name: "res4a_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4a_branch2b" top: "res4a_branch2b" name: "bn4a_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4a_branch2b" top: "res4a_branch2b" name: "scale4a_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4a_branch2b" bottom: "res4a_branch2b" name: "res4a_branch2b_relu" type: "ReLU" } layer { bottom: "res4a_branch2b" top: "res4a_branch2c" name: "res4a_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4a_branch2c" top: "res4a_branch2c" name: "bn4a_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4a_branch2c" top: "res4a_branch2c" name: "scale4a_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4a_branch1" bottom: "res4a_branch2c" top: "res4a" name: "res4a" type: "Eltwise" } layer { bottom: "res4a" top: "res4a" name: "res4a_relu" type: "ReLU" } layer { bottom: "res4a" top: "res4b1_branch2a" name: "res4b1_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b1_branch2a" top: "res4b1_branch2a" name: "bn4b1_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b1_branch2a" top: "res4b1_branch2a" name: "scale4b1_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b1_branch2a" bottom: "res4b1_branch2a" name: "res4b1_branch2a_relu" type: "ReLU" } layer { bottom: "res4b1_branch2a" top: "res4b1_branch2b" name: "res4b1_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b1_branch2b" top: "res4b1_branch2b" name: "bn4b1_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b1_branch2b" top: "res4b1_branch2b" name: "scale4b1_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b1_branch2b" bottom: "res4b1_branch2b" name: "res4b1_branch2b_relu" type: "ReLU" } layer { bottom: "res4b1_branch2b" top: "res4b1_branch2c" name: "res4b1_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b1_branch2c" top: "res4b1_branch2c" name: "bn4b1_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b1_branch2c" top: "res4b1_branch2c" name: "scale4b1_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4a" bottom: "res4b1_branch2c" top: "res4b1" name: "res4b1" type: "Eltwise" } layer { bottom: "res4b1" top: "res4b1" name: "res4b1_relu" type: "ReLU" } layer { bottom: "res4b1" top: "res4b2_branch2a" name: "res4b2_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b2_branch2a" top: "res4b2_branch2a" name: "bn4b2_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b2_branch2a" top: "res4b2_branch2a" name: "scale4b2_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b2_branch2a" bottom: "res4b2_branch2a" name: "res4b2_branch2a_relu" type: "ReLU" } layer { bottom: "res4b2_branch2a" top: "res4b2_branch2b" name: "res4b2_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b2_branch2b" top: "res4b2_branch2b" name: "bn4b2_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b2_branch2b" top: "res4b2_branch2b" name: "scale4b2_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b2_branch2b" bottom: "res4b2_branch2b" name: "res4b2_branch2b_relu" type: "ReLU" } layer { bottom: "res4b2_branch2b" top: "res4b2_branch2c" name: "res4b2_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b2_branch2c" top: "res4b2_branch2c" name: "bn4b2_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b2_branch2c" top: "res4b2_branch2c" name: "scale4b2_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b1" bottom: "res4b2_branch2c" top: "res4b2" name: "res4b2" type: "Eltwise" } layer { bottom: "res4b2" top: "res4b2" name: "res4b2_relu" type: "ReLU" } layer { bottom: "res4b2" top: "res4b3_branch2a" name: "res4b3_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b3_branch2a" top: "res4b3_branch2a" name: "bn4b3_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b3_branch2a" top: "res4b3_branch2a" name: "scale4b3_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b3_branch2a" bottom: "res4b3_branch2a" name: "res4b3_branch2a_relu" type: "ReLU" } layer { bottom: "res4b3_branch2a" top: "res4b3_branch2b" name: "res4b3_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b3_branch2b" top: "res4b3_branch2b" name: "bn4b3_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b3_branch2b" top: "res4b3_branch2b" name: "scale4b3_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b3_branch2b" bottom: "res4b3_branch2b" name: "res4b3_branch2b_relu" type: "ReLU" } layer { bottom: "res4b3_branch2b" top: "res4b3_branch2c" name: "res4b3_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b3_branch2c" top: "res4b3_branch2c" name: "bn4b3_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b3_branch2c" top: "res4b3_branch2c" name: "scale4b3_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b2" bottom: "res4b3_branch2c" top: "res4b3" name: "res4b3" type: "Eltwise" } layer { bottom: "res4b3" top: "res4b3" name: "res4b3_relu" type: "ReLU" } layer { bottom: "res4b3" top: "res4b4_branch2a" name: "res4b4_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b4_branch2a" top: "res4b4_branch2a" name: "bn4b4_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b4_branch2a" top: "res4b4_branch2a" name: "scale4b4_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b4_branch2a" bottom: "res4b4_branch2a" name: "res4b4_branch2a_relu" type: "ReLU" } layer { bottom: "res4b4_branch2a" top: "res4b4_branch2b" name: "res4b4_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b4_branch2b" top: "res4b4_branch2b" name: "bn4b4_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b4_branch2b" top: "res4b4_branch2b" name: "scale4b4_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b4_branch2b" bottom: "res4b4_branch2b" name: "res4b4_branch2b_relu" type: "ReLU" } layer { bottom: "res4b4_branch2b" top: "res4b4_branch2c" name: "res4b4_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b4_branch2c" top: "res4b4_branch2c" name: "bn4b4_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b4_branch2c" top: "res4b4_branch2c" name: "scale4b4_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b3" bottom: "res4b4_branch2c" top: "res4b4" name: "res4b4" type: "Eltwise" } layer { bottom: "res4b4" top: "res4b4" name: "res4b4_relu" type: "ReLU" } layer { bottom: "res4b4" top: "res4b5_branch2a" name: "res4b5_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b5_branch2a" top: "res4b5_branch2a" name: "bn4b5_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b5_branch2a" top: "res4b5_branch2a" name: "scale4b5_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b5_branch2a" bottom: "res4b5_branch2a" name: "res4b5_branch2a_relu" type: "ReLU" } layer { bottom: "res4b5_branch2a" top: "res4b5_branch2b" name: "res4b5_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b5_branch2b" top: "res4b5_branch2b" name: "bn4b5_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b5_branch2b" top: "res4b5_branch2b" name: "scale4b5_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b5_branch2b" bottom: "res4b5_branch2b" name: "res4b5_branch2b_relu" type: "ReLU" } layer { bottom: "res4b5_branch2b" top: "res4b5_branch2c" name: "res4b5_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b5_branch2c" top: "res4b5_branch2c" name: "bn4b5_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b5_branch2c" top: "res4b5_branch2c" name: "scale4b5_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b4" bottom: "res4b5_branch2c" top: "res4b5" name: "res4b5" type: "Eltwise" } layer { bottom: "res4b5" top: "res4b5" name: "res4b5_relu" type: "ReLU" } layer { bottom: "res4b5" top: "res4b6_branch2a" name: "res4b6_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b6_branch2a" top: "res4b6_branch2a" name: "bn4b6_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b6_branch2a" top: "res4b6_branch2a" name: "scale4b6_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b6_branch2a" bottom: "res4b6_branch2a" name: "res4b6_branch2a_relu" type: "ReLU" } layer { bottom: "res4b6_branch2a" top: "res4b6_branch2b" name: "res4b6_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b6_branch2b" top: "res4b6_branch2b" name: "bn4b6_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b6_branch2b" top: "res4b6_branch2b" name: "scale4b6_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b6_branch2b" bottom: "res4b6_branch2b" name: "res4b6_branch2b_relu" type: "ReLU" } layer { bottom: "res4b6_branch2b" top: "res4b6_branch2c" name: "res4b6_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b6_branch2c" top: "res4b6_branch2c" name: "bn4b6_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b6_branch2c" top: "res4b6_branch2c" name: "scale4b6_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b5" bottom: "res4b6_branch2c" top: "res4b6" name: "res4b6" type: "Eltwise" } layer { bottom: "res4b6" top: "res4b6" name: "res4b6_relu" type: "ReLU" } layer { bottom: "res4b6" top: "res4b7_branch2a" name: "res4b7_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b7_branch2a" top: "res4b7_branch2a" name: "bn4b7_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b7_branch2a" top: "res4b7_branch2a" name: "scale4b7_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b7_branch2a" bottom: "res4b7_branch2a" name: "res4b7_branch2a_relu" type: "ReLU" } layer { bottom: "res4b7_branch2a" top: "res4b7_branch2b" name: "res4b7_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b7_branch2b" top: "res4b7_branch2b" name: "bn4b7_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b7_branch2b" top: "res4b7_branch2b" name: "scale4b7_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b7_branch2b" bottom: "res4b7_branch2b" name: "res4b7_branch2b_relu" type: "ReLU" } layer { bottom: "res4b7_branch2b" top: "res4b7_branch2c" name: "res4b7_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b7_branch2c" top: "res4b7_branch2c" name: "bn4b7_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b7_branch2c" top: "res4b7_branch2c" name: "scale4b7_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b6" bottom: "res4b7_branch2c" top: "res4b7" name: "res4b7" type: "Eltwise" } layer { bottom: "res4b7" top: "res4b7" name: "res4b7_relu" type: "ReLU" } layer { bottom: "res4b7" top: "res4b8_branch2a" name: "res4b8_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b8_branch2a" top: "res4b8_branch2a" name: "bn4b8_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b8_branch2a" top: "res4b8_branch2a" name: "scale4b8_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b8_branch2a" bottom: "res4b8_branch2a" name: "res4b8_branch2a_relu" type: "ReLU" } layer { bottom: "res4b8_branch2a" top: "res4b8_branch2b" name: "res4b8_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b8_branch2b" top: "res4b8_branch2b" name: "bn4b8_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b8_branch2b" top: "res4b8_branch2b" name: "scale4b8_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b8_branch2b" bottom: "res4b8_branch2b" name: "res4b8_branch2b_relu" type: "ReLU" } layer { bottom: "res4b8_branch2b" top: "res4b8_branch2c" name: "res4b8_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b8_branch2c" top: "res4b8_branch2c" name: "bn4b8_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b8_branch2c" top: "res4b8_branch2c" name: "scale4b8_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b7" bottom: "res4b8_branch2c" top: "res4b8" name: "res4b8" type: "Eltwise" } layer { bottom: "res4b8" top: "res4b8" name: "res4b8_relu" type: "ReLU" } layer { bottom: "res4b8" top: "res4b9_branch2a" name: "res4b9_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b9_branch2a" top: "res4b9_branch2a" name: "bn4b9_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b9_branch2a" top: "res4b9_branch2a" name: "scale4b9_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b9_branch2a" bottom: "res4b9_branch2a" name: "res4b9_branch2a_relu" type: "ReLU" } layer { bottom: "res4b9_branch2a" top: "res4b9_branch2b" name: "res4b9_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b9_branch2b" top: "res4b9_branch2b" name: "bn4b9_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b9_branch2b" top: "res4b9_branch2b" name: "scale4b9_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b9_branch2b" bottom: "res4b9_branch2b" name: "res4b9_branch2b_relu" type: "ReLU" } layer { bottom: "res4b9_branch2b" top: "res4b9_branch2c" name: "res4b9_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b9_branch2c" top: "res4b9_branch2c" name: "bn4b9_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b9_branch2c" top: "res4b9_branch2c" name: "scale4b9_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b8" bottom: "res4b9_branch2c" top: "res4b9" name: "res4b9" type: "Eltwise" } layer { bottom: "res4b9" top: "res4b9" name: "res4b9_relu" type: "ReLU" } layer { bottom: "res4b9" top: "res4b10_branch2a" name: "res4b10_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b10_branch2a" top: "res4b10_branch2a" name: "bn4b10_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b10_branch2a" top: "res4b10_branch2a" name: "scale4b10_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b10_branch2a" bottom: "res4b10_branch2a" name: "res4b10_branch2a_relu" type: "ReLU" } layer { bottom: "res4b10_branch2a" top: "res4b10_branch2b" name: "res4b10_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b10_branch2b" top: "res4b10_branch2b" name: "bn4b10_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b10_branch2b" top: "res4b10_branch2b" name: "scale4b10_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b10_branch2b" bottom: "res4b10_branch2b" name: "res4b10_branch2b_relu" type: "ReLU" } layer { bottom: "res4b10_branch2b" top: "res4b10_branch2c" name: "res4b10_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b10_branch2c" top: "res4b10_branch2c" name: "bn4b10_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b10_branch2c" top: "res4b10_branch2c" name: "scale4b10_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b9" bottom: "res4b10_branch2c" top: "res4b10" name: "res4b10" type: "Eltwise" } layer { bottom: "res4b10" top: "res4b10" name: "res4b10_relu" type: "ReLU" } layer { bottom: "res4b10" top: "res4b11_branch2a" name: "res4b11_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b11_branch2a" top: "res4b11_branch2a" name: "bn4b11_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b11_branch2a" top: "res4b11_branch2a" name: "scale4b11_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b11_branch2a" bottom: "res4b11_branch2a" name: "res4b11_branch2a_relu" type: "ReLU" } layer { bottom: "res4b11_branch2a" top: "res4b11_branch2b" name: "res4b11_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b11_branch2b" top: "res4b11_branch2b" name: "bn4b11_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b11_branch2b" top: "res4b11_branch2b" name: "scale4b11_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b11_branch2b" bottom: "res4b11_branch2b" name: "res4b11_branch2b_relu" type: "ReLU" } layer { bottom: "res4b11_branch2b" top: "res4b11_branch2c" name: "res4b11_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b11_branch2c" top: "res4b11_branch2c" name: "bn4b11_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b11_branch2c" top: "res4b11_branch2c" name: "scale4b11_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b10" bottom: "res4b11_branch2c" top: "res4b11" name: "res4b11" type: "Eltwise" } layer { bottom: "res4b11" top: "res4b11" name: "res4b11_relu" type: "ReLU" } layer { bottom: "res4b11" top: "res4b12_branch2a" name: "res4b12_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b12_branch2a" top: "res4b12_branch2a" name: "bn4b12_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b12_branch2a" top: "res4b12_branch2a" name: "scale4b12_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b12_branch2a" bottom: "res4b12_branch2a" name: "res4b12_branch2a_relu" type: "ReLU" } layer { bottom: "res4b12_branch2a" top: "res4b12_branch2b" name: "res4b12_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b12_branch2b" top: "res4b12_branch2b" name: "bn4b12_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b12_branch2b" top: "res4b12_branch2b" name: "scale4b12_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b12_branch2b" bottom: "res4b12_branch2b" name: "res4b12_branch2b_relu" type: "ReLU" } layer { bottom: "res4b12_branch2b" top: "res4b12_branch2c" name: "res4b12_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b12_branch2c" top: "res4b12_branch2c" name: "bn4b12_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b12_branch2c" top: "res4b12_branch2c" name: "scale4b12_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b11" bottom: "res4b12_branch2c" top: "res4b12" name: "res4b12" type: "Eltwise" } layer { bottom: "res4b12" top: "res4b12" name: "res4b12_relu" type: "ReLU" } layer { bottom: "res4b12" top: "res4b13_branch2a" name: "res4b13_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b13_branch2a" top: "res4b13_branch2a" name: "bn4b13_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b13_branch2a" top: "res4b13_branch2a" name: "scale4b13_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b13_branch2a" bottom: "res4b13_branch2a" name: "res4b13_branch2a_relu" type: "ReLU" } layer { bottom: "res4b13_branch2a" top: "res4b13_branch2b" name: "res4b13_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b13_branch2b" top: "res4b13_branch2b" name: "bn4b13_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b13_branch2b" top: "res4b13_branch2b" name: "scale4b13_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b13_branch2b" bottom: "res4b13_branch2b" name: "res4b13_branch2b_relu" type: "ReLU" } layer { bottom: "res4b13_branch2b" top: "res4b13_branch2c" name: "res4b13_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b13_branch2c" top: "res4b13_branch2c" name: "bn4b13_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b13_branch2c" top: "res4b13_branch2c" name: "scale4b13_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b12" bottom: "res4b13_branch2c" top: "res4b13" name: "res4b13" type: "Eltwise" } layer { bottom: "res4b13" top: "res4b13" name: "res4b13_relu" type: "ReLU" } layer { bottom: "res4b13" top: "res4b14_branch2a" name: "res4b14_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b14_branch2a" top: "res4b14_branch2a" name: "bn4b14_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b14_branch2a" top: "res4b14_branch2a" name: "scale4b14_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b14_branch2a" bottom: "res4b14_branch2a" name: "res4b14_branch2a_relu" type: "ReLU" } layer { bottom: "res4b14_branch2a" top: "res4b14_branch2b" name: "res4b14_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b14_branch2b" top: "res4b14_branch2b" name: "bn4b14_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b14_branch2b" top: "res4b14_branch2b" name: "scale4b14_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b14_branch2b" bottom: "res4b14_branch2b" name: "res4b14_branch2b_relu" type: "ReLU" } layer { bottom: "res4b14_branch2b" top: "res4b14_branch2c" name: "res4b14_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b14_branch2c" top: "res4b14_branch2c" name: "bn4b14_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b14_branch2c" top: "res4b14_branch2c" name: "scale4b14_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b13" bottom: "res4b14_branch2c" top: "res4b14" name: "res4b14" type: "Eltwise" } layer { bottom: "res4b14" top: "res4b14" name: "res4b14_relu" type: "ReLU" } layer { bottom: "res4b14" top: "res4b15_branch2a" name: "res4b15_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b15_branch2a" top: "res4b15_branch2a" name: "bn4b15_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b15_branch2a" top: "res4b15_branch2a" name: "scale4b15_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b15_branch2a" bottom: "res4b15_branch2a" name: "res4b15_branch2a_relu" type: "ReLU" } layer { bottom: "res4b15_branch2a" top: "res4b15_branch2b" name: "res4b15_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b15_branch2b" top: "res4b15_branch2b" name: "bn4b15_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b15_branch2b" top: "res4b15_branch2b" name: "scale4b15_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b15_branch2b" bottom: "res4b15_branch2b" name: "res4b15_branch2b_relu" type: "ReLU" } layer { bottom: "res4b15_branch2b" top: "res4b15_branch2c" name: "res4b15_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b15_branch2c" top: "res4b15_branch2c" name: "bn4b15_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b15_branch2c" top: "res4b15_branch2c" name: "scale4b15_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b14" bottom: "res4b15_branch2c" top: "res4b15" name: "res4b15" type: "Eltwise" } layer { bottom: "res4b15" top: "res4b15" name: "res4b15_relu" type: "ReLU" } layer { bottom: "res4b15" top: "res4b16_branch2a" name: "res4b16_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b16_branch2a" top: "res4b16_branch2a" name: "bn4b16_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b16_branch2a" top: "res4b16_branch2a" name: "scale4b16_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b16_branch2a" bottom: "res4b16_branch2a" name: "res4b16_branch2a_relu" type: "ReLU" } layer { bottom: "res4b16_branch2a" top: "res4b16_branch2b" name: "res4b16_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b16_branch2b" top: "res4b16_branch2b" name: "bn4b16_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b16_branch2b" top: "res4b16_branch2b" name: "scale4b16_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b16_branch2b" bottom: "res4b16_branch2b" name: "res4b16_branch2b_relu" type: "ReLU" } layer { bottom: "res4b16_branch2b" top: "res4b16_branch2c" name: "res4b16_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b16_branch2c" top: "res4b16_branch2c" name: "bn4b16_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b16_branch2c" top: "res4b16_branch2c" name: "scale4b16_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b15" bottom: "res4b16_branch2c" top: "res4b16" name: "res4b16" type: "Eltwise" } layer { bottom: "res4b16" top: "res4b16" name: "res4b16_relu" type: "ReLU" } layer { bottom: "res4b16" top: "res4b17_branch2a" name: "res4b17_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b17_branch2a" top: "res4b17_branch2a" name: "bn4b17_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b17_branch2a" top: "res4b17_branch2a" name: "scale4b17_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b17_branch2a" bottom: "res4b17_branch2a" name: "res4b17_branch2a_relu" type: "ReLU" } layer { bottom: "res4b17_branch2a" top: "res4b17_branch2b" name: "res4b17_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b17_branch2b" top: "res4b17_branch2b" name: "bn4b17_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b17_branch2b" top: "res4b17_branch2b" name: "scale4b17_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b17_branch2b" bottom: "res4b17_branch2b" name: "res4b17_branch2b_relu" type: "ReLU" } layer { bottom: "res4b17_branch2b" top: "res4b17_branch2c" name: "res4b17_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b17_branch2c" top: "res4b17_branch2c" name: "bn4b17_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b17_branch2c" top: "res4b17_branch2c" name: "scale4b17_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b16" bottom: "res4b17_branch2c" top: "res4b17" name: "res4b17" type: "Eltwise" } layer { bottom: "res4b17" top: "res4b17" name: "res4b17_relu" type: "ReLU" } layer { bottom: "res4b17" top: "res4b18_branch2a" name: "res4b18_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b18_branch2a" top: "res4b18_branch2a" name: "bn4b18_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b18_branch2a" top: "res4b18_branch2a" name: "scale4b18_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b18_branch2a" bottom: "res4b18_branch2a" name: "res4b18_branch2a_relu" type: "ReLU" } layer { bottom: "res4b18_branch2a" top: "res4b18_branch2b" name: "res4b18_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b18_branch2b" top: "res4b18_branch2b" name: "bn4b18_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b18_branch2b" top: "res4b18_branch2b" name: "scale4b18_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b18_branch2b" bottom: "res4b18_branch2b" name: "res4b18_branch2b_relu" type: "ReLU" } layer { bottom: "res4b18_branch2b" top: "res4b18_branch2c" name: "res4b18_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b18_branch2c" top: "res4b18_branch2c" name: "bn4b18_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b18_branch2c" top: "res4b18_branch2c" name: "scale4b18_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b17" bottom: "res4b18_branch2c" top: "res4b18" name: "res4b18" type: "Eltwise" } layer { bottom: "res4b18" top: "res4b18" name: "res4b18_relu" type: "ReLU" } layer { bottom: "res4b18" top: "res4b19_branch2a" name: "res4b19_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b19_branch2a" top: "res4b19_branch2a" name: "bn4b19_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b19_branch2a" top: "res4b19_branch2a" name: "scale4b19_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b19_branch2a" bottom: "res4b19_branch2a" name: "res4b19_branch2a_relu" type: "ReLU" } layer { bottom: "res4b19_branch2a" top: "res4b19_branch2b" name: "res4b19_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b19_branch2b" top: "res4b19_branch2b" name: "bn4b19_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b19_branch2b" top: "res4b19_branch2b" name: "scale4b19_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b19_branch2b" bottom: "res4b19_branch2b" name: "res4b19_branch2b_relu" type: "ReLU" } layer { bottom: "res4b19_branch2b" top: "res4b19_branch2c" name: "res4b19_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b19_branch2c" top: "res4b19_branch2c" name: "bn4b19_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b19_branch2c" top: "res4b19_branch2c" name: "scale4b19_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b18" bottom: "res4b19_branch2c" top: "res4b19" name: "res4b19" type: "Eltwise" } layer { bottom: "res4b19" top: "res4b19" name: "res4b19_relu" type: "ReLU" } layer { bottom: "res4b19" top: "res4b20_branch2a" name: "res4b20_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b20_branch2a" top: "res4b20_branch2a" name: "bn4b20_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b20_branch2a" top: "res4b20_branch2a" name: "scale4b20_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b20_branch2a" bottom: "res4b20_branch2a" name: "res4b20_branch2a_relu" type: "ReLU" } layer { bottom: "res4b20_branch2a" top: "res4b20_branch2b" name: "res4b20_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b20_branch2b" top: "res4b20_branch2b" name: "bn4b20_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b20_branch2b" top: "res4b20_branch2b" name: "scale4b20_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b20_branch2b" bottom: "res4b20_branch2b" name: "res4b20_branch2b_relu" type: "ReLU" } layer { bottom: "res4b20_branch2b" top: "res4b20_branch2c" name: "res4b20_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b20_branch2c" top: "res4b20_branch2c" name: "bn4b20_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b20_branch2c" top: "res4b20_branch2c" name: "scale4b20_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b19" bottom: "res4b20_branch2c" top: "res4b20" name: "res4b20" type: "Eltwise" } layer { bottom: "res4b20" top: "res4b20" name: "res4b20_relu" type: "ReLU" } layer { bottom: "res4b20" top: "res4b21_branch2a" name: "res4b21_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b21_branch2a" top: "res4b21_branch2a" name: "bn4b21_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b21_branch2a" top: "res4b21_branch2a" name: "scale4b21_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b21_branch2a" bottom: "res4b21_branch2a" name: "res4b21_branch2a_relu" type: "ReLU" } layer { bottom: "res4b21_branch2a" top: "res4b21_branch2b" name: "res4b21_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b21_branch2b" top: "res4b21_branch2b" name: "bn4b21_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b21_branch2b" top: "res4b21_branch2b" name: "scale4b21_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b21_branch2b" bottom: "res4b21_branch2b" name: "res4b21_branch2b_relu" type: "ReLU" } layer { bottom: "res4b21_branch2b" top: "res4b21_branch2c" name: "res4b21_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b21_branch2c" top: "res4b21_branch2c" name: "bn4b21_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b21_branch2c" top: "res4b21_branch2c" name: "scale4b21_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b20" bottom: "res4b21_branch2c" top: "res4b21" name: "res4b21" type: "Eltwise" } layer { bottom: "res4b21" top: "res4b21" name: "res4b21_relu" type: "ReLU" } layer { bottom: "res4b21" top: "res4b22_branch2a" name: "res4b22_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b22_branch2a" top: "res4b22_branch2a" name: "bn4b22_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b22_branch2a" top: "res4b22_branch2a" name: "scale4b22_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b22_branch2a" bottom: "res4b22_branch2a" name: "res4b22_branch2a_relu" type: "ReLU" } layer { bottom: "res4b22_branch2a" top: "res4b22_branch2b" name: "res4b22_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b22_branch2b" top: "res4b22_branch2b" name: "bn4b22_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b22_branch2b" top: "res4b22_branch2b" name: "scale4b22_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b22_branch2b" bottom: "res4b22_branch2b" name: "res4b22_branch2b_relu" type: "ReLU" } layer { bottom: "res4b22_branch2b" top: "res4b22_branch2c" name: "res4b22_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b22_branch2c" top: "res4b22_branch2c" name: "bn4b22_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b22_branch2c" top: "res4b22_branch2c" name: "scale4b22_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b21" bottom: "res4b22_branch2c" top: "res4b22" name: "res4b22" type: "Eltwise" } layer { bottom: "res4b22" top: "res4b22" name: "res4b22_relu" type: "ReLU" } layer { bottom: "res4b22" top: "res4b23_branch2a" name: "res4b23_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b23_branch2a" top: "res4b23_branch2a" name: "bn4b23_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b23_branch2a" top: "res4b23_branch2a" name: "scale4b23_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b23_branch2a" bottom: "res4b23_branch2a" name: "res4b23_branch2a_relu" type: "ReLU" } layer { bottom: "res4b23_branch2a" top: "res4b23_branch2b" name: "res4b23_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b23_branch2b" top: "res4b23_branch2b" name: "bn4b23_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b23_branch2b" top: "res4b23_branch2b" name: "scale4b23_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b23_branch2b" bottom: "res4b23_branch2b" name: "res4b23_branch2b_relu" type: "ReLU" } layer { bottom: "res4b23_branch2b" top: "res4b23_branch2c" name: "res4b23_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b23_branch2c" top: "res4b23_branch2c" name: "bn4b23_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b23_branch2c" top: "res4b23_branch2c" name: "scale4b23_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b22" bottom: "res4b23_branch2c" top: "res4b23" name: "res4b23" type: "Eltwise" } layer { bottom: "res4b23" top: "res4b23" name: "res4b23_relu" type: "ReLU" } layer { bottom: "res4b23" top: "res4b24_branch2a" name: "res4b24_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b24_branch2a" top: "res4b24_branch2a" name: "bn4b24_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b24_branch2a" top: "res4b24_branch2a" name: "scale4b24_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b24_branch2a" bottom: "res4b24_branch2a" name: "res4b24_branch2a_relu" type: "ReLU" } layer { bottom: "res4b24_branch2a" top: "res4b24_branch2b" name: "res4b24_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b24_branch2b" top: "res4b24_branch2b" name: "bn4b24_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b24_branch2b" top: "res4b24_branch2b" name: "scale4b24_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b24_branch2b" bottom: "res4b24_branch2b" name: "res4b24_branch2b_relu" type: "ReLU" } layer { bottom: "res4b24_branch2b" top: "res4b24_branch2c" name: "res4b24_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b24_branch2c" top: "res4b24_branch2c" name: "bn4b24_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b24_branch2c" top: "res4b24_branch2c" name: "scale4b24_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b23" bottom: "res4b24_branch2c" top: "res4b24" name: "res4b24" type: "Eltwise" } layer { bottom: "res4b24" top: "res4b24" name: "res4b24_relu" type: "ReLU" } layer { bottom: "res4b24" top: "res4b25_branch2a" name: "res4b25_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b25_branch2a" top: "res4b25_branch2a" name: "bn4b25_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b25_branch2a" top: "res4b25_branch2a" name: "scale4b25_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b25_branch2a" bottom: "res4b25_branch2a" name: "res4b25_branch2a_relu" type: "ReLU" } layer { bottom: "res4b25_branch2a" top: "res4b25_branch2b" name: "res4b25_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b25_branch2b" top: "res4b25_branch2b" name: "bn4b25_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b25_branch2b" top: "res4b25_branch2b" name: "scale4b25_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b25_branch2b" bottom: "res4b25_branch2b" name: "res4b25_branch2b_relu" type: "ReLU" } layer { bottom: "res4b25_branch2b" top: "res4b25_branch2c" name: "res4b25_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b25_branch2c" top: "res4b25_branch2c" name: "bn4b25_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b25_branch2c" top: "res4b25_branch2c" name: "scale4b25_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b24" bottom: "res4b25_branch2c" top: "res4b25" name: "res4b25" type: "Eltwise" } layer { bottom: "res4b25" top: "res4b25" name: "res4b25_relu" type: "ReLU" } layer { bottom: "res4b25" top: "res4b26_branch2a" name: "res4b26_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b26_branch2a" top: "res4b26_branch2a" name: "bn4b26_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b26_branch2a" top: "res4b26_branch2a" name: "scale4b26_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b26_branch2a" bottom: "res4b26_branch2a" name: "res4b26_branch2a_relu" type: "ReLU" } layer { bottom: "res4b26_branch2a" top: "res4b26_branch2b" name: "res4b26_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b26_branch2b" top: "res4b26_branch2b" name: "bn4b26_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b26_branch2b" top: "res4b26_branch2b" name: "scale4b26_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b26_branch2b" bottom: "res4b26_branch2b" name: "res4b26_branch2b_relu" type: "ReLU" } layer { bottom: "res4b26_branch2b" top: "res4b26_branch2c" name: "res4b26_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b26_branch2c" top: "res4b26_branch2c" name: "bn4b26_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b26_branch2c" top: "res4b26_branch2c" name: "scale4b26_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b25" bottom: "res4b26_branch2c" top: "res4b26" name: "res4b26" type: "Eltwise" } layer { bottom: "res4b26" top: "res4b26" name: "res4b26_relu" type: "ReLU" } layer { bottom: "res4b26" top: "res4b27_branch2a" name: "res4b27_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b27_branch2a" top: "res4b27_branch2a" name: "bn4b27_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b27_branch2a" top: "res4b27_branch2a" name: "scale4b27_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b27_branch2a" bottom: "res4b27_branch2a" name: "res4b27_branch2a_relu" type: "ReLU" } layer { bottom: "res4b27_branch2a" top: "res4b27_branch2b" name: "res4b27_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b27_branch2b" top: "res4b27_branch2b" name: "bn4b27_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b27_branch2b" top: "res4b27_branch2b" name: "scale4b27_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b27_branch2b" bottom: "res4b27_branch2b" name: "res4b27_branch2b_relu" type: "ReLU" } layer { bottom: "res4b27_branch2b" top: "res4b27_branch2c" name: "res4b27_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b27_branch2c" top: "res4b27_branch2c" name: "bn4b27_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b27_branch2c" top: "res4b27_branch2c" name: "scale4b27_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b26" bottom: "res4b27_branch2c" top: "res4b27" name: "res4b27" type: "Eltwise" } layer { bottom: "res4b27" top: "res4b27" name: "res4b27_relu" type: "ReLU" } layer { bottom: "res4b27" top: "res4b28_branch2a" name: "res4b28_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b28_branch2a" top: "res4b28_branch2a" name: "bn4b28_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b28_branch2a" top: "res4b28_branch2a" name: "scale4b28_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b28_branch2a" bottom: "res4b28_branch2a" name: "res4b28_branch2a_relu" type: "ReLU" } layer { bottom: "res4b28_branch2a" top: "res4b28_branch2b" name: "res4b28_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b28_branch2b" top: "res4b28_branch2b" name: "bn4b28_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b28_branch2b" top: "res4b28_branch2b" name: "scale4b28_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b28_branch2b" bottom: "res4b28_branch2b" name: "res4b28_branch2b_relu" type: "ReLU" } layer { bottom: "res4b28_branch2b" top: "res4b28_branch2c" name: "res4b28_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b28_branch2c" top: "res4b28_branch2c" name: "bn4b28_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b28_branch2c" top: "res4b28_branch2c" name: "scale4b28_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b27" bottom: "res4b28_branch2c" top: "res4b28" name: "res4b28" type: "Eltwise" } layer { bottom: "res4b28" top: "res4b28" name: "res4b28_relu" type: "ReLU" } layer { bottom: "res4b28" top: "res4b29_branch2a" name: "res4b29_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b29_branch2a" top: "res4b29_branch2a" name: "bn4b29_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b29_branch2a" top: "res4b29_branch2a" name: "scale4b29_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b29_branch2a" bottom: "res4b29_branch2a" name: "res4b29_branch2a_relu" type: "ReLU" } layer { bottom: "res4b29_branch2a" top: "res4b29_branch2b" name: "res4b29_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b29_branch2b" top: "res4b29_branch2b" name: "bn4b29_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b29_branch2b" top: "res4b29_branch2b" name: "scale4b29_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b29_branch2b" bottom: "res4b29_branch2b" name: "res4b29_branch2b_relu" type: "ReLU" } layer { bottom: "res4b29_branch2b" top: "res4b29_branch2c" name: "res4b29_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b29_branch2c" top: "res4b29_branch2c" name: "bn4b29_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b29_branch2c" top: "res4b29_branch2c" name: "scale4b29_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b28" bottom: "res4b29_branch2c" top: "res4b29" name: "res4b29" type: "Eltwise" } layer { bottom: "res4b29" top: "res4b29" name: "res4b29_relu" type: "ReLU" } layer { bottom: "res4b29" top: "res4b30_branch2a" name: "res4b30_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b30_branch2a" top: "res4b30_branch2a" name: "bn4b30_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b30_branch2a" top: "res4b30_branch2a" name: "scale4b30_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b30_branch2a" bottom: "res4b30_branch2a" name: "res4b30_branch2a_relu" type: "ReLU" } layer { bottom: "res4b30_branch2a" top: "res4b30_branch2b" name: "res4b30_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b30_branch2b" top: "res4b30_branch2b" name: "bn4b30_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b30_branch2b" top: "res4b30_branch2b" name: "scale4b30_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b30_branch2b" bottom: "res4b30_branch2b" name: "res4b30_branch2b_relu" type: "ReLU" } layer { bottom: "res4b30_branch2b" top: "res4b30_branch2c" name: "res4b30_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b30_branch2c" top: "res4b30_branch2c" name: "bn4b30_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b30_branch2c" top: "res4b30_branch2c" name: "scale4b30_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b29" bottom: "res4b30_branch2c" top: "res4b30" name: "res4b30" type: "Eltwise" } layer { bottom: "res4b30" top: "res4b30" name: "res4b30_relu" type: "ReLU" } layer { bottom: "res4b30" top: "res4b31_branch2a" name: "res4b31_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b31_branch2a" top: "res4b31_branch2a" name: "bn4b31_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b31_branch2a" top: "res4b31_branch2a" name: "scale4b31_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b31_branch2a" bottom: "res4b31_branch2a" name: "res4b31_branch2a_relu" type: "ReLU" } layer { bottom: "res4b31_branch2a" top: "res4b31_branch2b" name: "res4b31_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b31_branch2b" top: "res4b31_branch2b" name: "bn4b31_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b31_branch2b" top: "res4b31_branch2b" name: "scale4b31_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b31_branch2b" bottom: "res4b31_branch2b" name: "res4b31_branch2b_relu" type: "ReLU" } layer { bottom: "res4b31_branch2b" top: "res4b31_branch2c" name: "res4b31_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b31_branch2c" top: "res4b31_branch2c" name: "bn4b31_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b31_branch2c" top: "res4b31_branch2c" name: "scale4b31_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b30" bottom: "res4b31_branch2c" top: "res4b31" name: "res4b31" type: "Eltwise" } layer { bottom: "res4b31" top: "res4b31" name: "res4b31_relu" type: "ReLU" } layer { bottom: "res4b31" top: "res4b32_branch2a" name: "res4b32_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b32_branch2a" top: "res4b32_branch2a" name: "bn4b32_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b32_branch2a" top: "res4b32_branch2a" name: "scale4b32_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b32_branch2a" bottom: "res4b32_branch2a" name: "res4b32_branch2a_relu" type: "ReLU" } layer { bottom: "res4b32_branch2a" top: "res4b32_branch2b" name: "res4b32_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b32_branch2b" top: "res4b32_branch2b" name: "bn4b32_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b32_branch2b" top: "res4b32_branch2b" name: "scale4b32_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b32_branch2b" bottom: "res4b32_branch2b" name: "res4b32_branch2b_relu" type: "ReLU" } layer { bottom: "res4b32_branch2b" top: "res4b32_branch2c" name: "res4b32_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b32_branch2c" top: "res4b32_branch2c" name: "bn4b32_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b32_branch2c" top: "res4b32_branch2c" name: "scale4b32_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b31" bottom: "res4b32_branch2c" top: "res4b32" name: "res4b32" type: "Eltwise" } layer { bottom: "res4b32" top: "res4b32" name: "res4b32_relu" type: "ReLU" } layer { bottom: "res4b32" top: "res4b33_branch2a" name: "res4b33_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b33_branch2a" top: "res4b33_branch2a" name: "bn4b33_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b33_branch2a" top: "res4b33_branch2a" name: "scale4b33_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b33_branch2a" bottom: "res4b33_branch2a" name: "res4b33_branch2a_relu" type: "ReLU" } layer { bottom: "res4b33_branch2a" top: "res4b33_branch2b" name: "res4b33_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b33_branch2b" top: "res4b33_branch2b" name: "bn4b33_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b33_branch2b" top: "res4b33_branch2b" name: "scale4b33_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b33_branch2b" bottom: "res4b33_branch2b" name: "res4b33_branch2b_relu" type: "ReLU" } layer { bottom: "res4b33_branch2b" top: "res4b33_branch2c" name: "res4b33_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b33_branch2c" top: "res4b33_branch2c" name: "bn4b33_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b33_branch2c" top: "res4b33_branch2c" name: "scale4b33_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b32" bottom: "res4b33_branch2c" top: "res4b33" name: "res4b33" type: "Eltwise" } layer { bottom: "res4b33" top: "res4b33" name: "res4b33_relu" type: "ReLU" } layer { bottom: "res4b33" top: "res4b34_branch2a" name: "res4b34_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b34_branch2a" top: "res4b34_branch2a" name: "bn4b34_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b34_branch2a" top: "res4b34_branch2a" name: "scale4b34_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b34_branch2a" bottom: "res4b34_branch2a" name: "res4b34_branch2a_relu" type: "ReLU" } layer { bottom: "res4b34_branch2a" top: "res4b34_branch2b" name: "res4b34_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b34_branch2b" top: "res4b34_branch2b" name: "bn4b34_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b34_branch2b" top: "res4b34_branch2b" name: "scale4b34_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b34_branch2b" bottom: "res4b34_branch2b" name: "res4b34_branch2b_relu" type: "ReLU" } layer { bottom: "res4b34_branch2b" top: "res4b34_branch2c" name: "res4b34_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b34_branch2c" top: "res4b34_branch2c" name: "bn4b34_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b34_branch2c" top: "res4b34_branch2c" name: "scale4b34_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b33" bottom: "res4b34_branch2c" top: "res4b34" name: "res4b34" type: "Eltwise" } layer { bottom: "res4b34" top: "res4b34" name: "res4b34_relu" type: "ReLU" } layer { bottom: "res4b34" top: "res4b35_branch2a" name: "res4b35_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b35_branch2a" top: "res4b35_branch2a" name: "bn4b35_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b35_branch2a" top: "res4b35_branch2a" name: "scale4b35_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b35_branch2a" bottom: "res4b35_branch2a" name: "res4b35_branch2a_relu" type: "ReLU" } layer { bottom: "res4b35_branch2a" top: "res4b35_branch2b" name: "res4b35_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b35_branch2b" top: "res4b35_branch2b" name: "bn4b35_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b35_branch2b" top: "res4b35_branch2b" name: "scale4b35_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res4b35_branch2b" bottom: "res4b35_branch2b" name: "res4b35_branch2b_relu" type: "ReLU" } layer { bottom: "res4b35_branch2b" top: "res4b35_branch2c" name: "res4b35_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b35_branch2c" top: "res4b35_branch2c" name: "bn4b35_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b35_branch2c" top: "res4b35_branch2c" name: "scale4b35_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b34" bottom: "res4b35_branch2c" top: "res4b35" name: "res4b35" type: "Eltwise" } layer { bottom: "res4b35" top: "res4b35" name: "res4b35_relu" type: "ReLU" } layer { bottom: "res4b35" top: "res5a_branch1" name: "res5a_branch1" type: "Convolution" convolution_param { num_output: 2048 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res5a_branch1" top: "res5a_branch1" name: "bn5a_branch1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5a_branch1" top: "res5a_branch1" name: "scale5a_branch1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b35" top: "res5a_branch2a" name: "res5a_branch2a" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res5a_branch2a" top: "res5a_branch2a" name: "bn5a_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5a_branch2a" top: "res5a_branch2a" name: "scale5a_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res5a_branch2a" bottom: "res5a_branch2a" name: "res5a_branch2a_relu" type: "ReLU" } layer { bottom: "res5a_branch2a" top: "res5a_branch2b" name: "res5a_branch2b" type: "Convolution" convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res5a_branch2b" top: "res5a_branch2b" name: "bn5a_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5a_branch2b" top: "res5a_branch2b" name: "scale5a_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res5a_branch2b" bottom: "res5a_branch2b" name: "res5a_branch2b_relu" type: "ReLU" } layer { bottom: "res5a_branch2b" top: "res5a_branch2c" name: "res5a_branch2c" type: "Convolution" convolution_param { num_output: 2048 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5a_branch2c" top: "res5a_branch2c" name: "bn5a_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5a_branch2c" top: "res5a_branch2c" name: "scale5a_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5a_branch1" bottom: "res5a_branch2c" top: "res5a" name: "res5a" type: "Eltwise" } layer { bottom: "res5a" top: "res5a" name: "res5a_relu" type: "ReLU" } layer { bottom: "res5a" top: "res5b_branch2a" name: "res5b_branch2a" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5b_branch2a" top: "res5b_branch2a" name: "bn5b_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5b_branch2a" top: "res5b_branch2a" name: "scale5b_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res5b_branch2a" bottom: "res5b_branch2a" name: "res5b_branch2a_relu" type: "ReLU" } layer { bottom: "res5b_branch2a" top: "res5b_branch2b" name: "res5b_branch2b" type: "Convolution" convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res5b_branch2b" top: "res5b_branch2b" name: "bn5b_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5b_branch2b" top: "res5b_branch2b" name: "scale5b_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res5b_branch2b" bottom: "res5b_branch2b" name: "res5b_branch2b_relu" type: "ReLU" } layer { bottom: "res5b_branch2b" top: "res5b_branch2c" name: "res5b_branch2c" type: "Convolution" convolution_param { num_output: 2048 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5b_branch2c" top: "res5b_branch2c" name: "bn5b_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5b_branch2c" top: "res5b_branch2c" name: "scale5b_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5a" bottom: "res5b_branch2c" top: "res5b" name: "res5b" type: "Eltwise" } layer { bottom: "res5b" top: "res5b" name: "res5b_relu" type: "ReLU" } layer { bottom: "res5b" top: "res5c_branch2a" name: "res5c_branch2a" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5c_branch2a" top: "res5c_branch2a" name: "bn5c_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5c_branch2a" top: "res5c_branch2a" name: "scale5c_branch2a" type: "Scale" scale_param { bias_term: true } } layer { top: "res5c_branch2a" bottom: "res5c_branch2a" name: "res5c_branch2a_relu" type: "ReLU" } layer { bottom: "res5c_branch2a" top: "res5c_branch2b" name: "res5c_branch2b" type: "Convolution" convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res5c_branch2b" top: "res5c_branch2b" name: "bn5c_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5c_branch2b" top: "res5c_branch2b" name: "scale5c_branch2b" type: "Scale" scale_param { bias_term: true } } layer { top: "res5c_branch2b" bottom: "res5c_branch2b" name: "res5c_branch2b_relu" type: "ReLU" } layer { bottom: "res5c_branch2b" top: "res5c_branch2c" name: "res5c_branch2c" type: "Convolution" convolution_param { num_output: 2048 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5c_branch2c" top: "res5c_branch2c" name: "bn5c_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5c_branch2c" top: "res5c_branch2c" name: "scale5c_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5b" bottom: "res5c_branch2c" top: "res5c" name: "res5c" type: "Eltwise" } layer { bottom: "res5c" top: "res5c" name: "res5c_relu" type: "ReLU" } layer { bottom: "res5c" top: "pool5" name: "pool5" type: "Pooling" pooling_param { kernel_size: 7 stride: 1 pool: AVE } } layer { bottom: "pool5" top: "fc1000" name: "fc1000" type: "InnerProduct" inner_product_param { num_output: 1000 } } layer { bottom: "fc1000" top: "prob" name: "prob" type: "Softmax" } ================================================ FILE: models/ResNet-50-deploy.prototxt ================================================ name: "ResNet-50" input: "data" input_dim: 1 input_dim: 3 input_dim: 224 input_dim: 224 layer { bottom: "data" top: "conv1" name: "conv1" type: "Convolution" convolution_param { num_output: 64 kernel_size: 7 pad: 3 stride: 2 } } layer { bottom: "conv1" top: "conv1" name: "bn_conv1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "conv1" top: "conv1" name: "scale_conv1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "conv1" top: "conv1" name: "conv1_relu" type: "ReLU" } layer { bottom: "conv1" top: "pool1" name: "pool1" type: "Pooling" pooling_param { kernel_size: 3 stride: 2 pool: MAX } } layer { bottom: "pool1" top: "res2a_branch1" name: "res2a_branch1" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2a_branch1" top: "res2a_branch1" name: "bn2a_branch1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2a_branch1" top: "res2a_branch1" name: "scale2a_branch1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "pool1" top: "res2a_branch2a" name: "res2a_branch2a" type: "Convolution" convolution_param { num_output: 64 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2a_branch2a" top: "res2a_branch2a" name: "bn2a_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2a_branch2a" top: "res2a_branch2a" name: "scale2a_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2a_branch2a" top: "res2a_branch2a" name: "res2a_branch2a_relu" type: "ReLU" } layer { bottom: "res2a_branch2a" top: "res2a_branch2b" name: "res2a_branch2b" type: "Convolution" convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res2a_branch2b" top: "res2a_branch2b" name: "bn2a_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2a_branch2b" top: "res2a_branch2b" name: "scale2a_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2a_branch2b" top: "res2a_branch2b" name: "res2a_branch2b_relu" type: "ReLU" } layer { bottom: "res2a_branch2b" top: "res2a_branch2c" name: "res2a_branch2c" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2a_branch2c" top: "res2a_branch2c" name: "bn2a_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2a_branch2c" top: "res2a_branch2c" name: "scale2a_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2a_branch1" bottom: "res2a_branch2c" top: "res2a" name: "res2a" type: "Eltwise" } layer { bottom: "res2a" top: "res2a" name: "res2a_relu" type: "ReLU" } layer { bottom: "res2a" top: "res2b_branch2a" name: "res2b_branch2a" type: "Convolution" convolution_param { num_output: 64 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2b_branch2a" top: "res2b_branch2a" name: "bn2b_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2b_branch2a" top: "res2b_branch2a" name: "scale2b_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2b_branch2a" top: "res2b_branch2a" name: "res2b_branch2a_relu" type: "ReLU" } layer { bottom: "res2b_branch2a" top: "res2b_branch2b" name: "res2b_branch2b" type: "Convolution" convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res2b_branch2b" top: "res2b_branch2b" name: "bn2b_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2b_branch2b" top: "res2b_branch2b" name: "scale2b_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2b_branch2b" top: "res2b_branch2b" name: "res2b_branch2b_relu" type: "ReLU" } layer { bottom: "res2b_branch2b" top: "res2b_branch2c" name: "res2b_branch2c" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2b_branch2c" top: "res2b_branch2c" name: "bn2b_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2b_branch2c" top: "res2b_branch2c" name: "scale2b_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2a" bottom: "res2b_branch2c" top: "res2b" name: "res2b" type: "Eltwise" } layer { bottom: "res2b" top: "res2b" name: "res2b_relu" type: "ReLU" } layer { bottom: "res2b" top: "res2c_branch2a" name: "res2c_branch2a" type: "Convolution" convolution_param { num_output: 64 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2c_branch2a" top: "res2c_branch2a" name: "bn2c_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2c_branch2a" top: "res2c_branch2a" name: "scale2c_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2c_branch2a" top: "res2c_branch2a" name: "res2c_branch2a_relu" type: "ReLU" } layer { bottom: "res2c_branch2a" top: "res2c_branch2b" name: "res2c_branch2b" type: "Convolution" convolution_param { num_output: 64 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res2c_branch2b" top: "res2c_branch2b" name: "bn2c_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2c_branch2b" top: "res2c_branch2b" name: "scale2c_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2c_branch2b" top: "res2c_branch2b" name: "res2c_branch2b_relu" type: "ReLU" } layer { bottom: "res2c_branch2b" top: "res2c_branch2c" name: "res2c_branch2c" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res2c_branch2c" top: "res2c_branch2c" name: "bn2c_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res2c_branch2c" top: "res2c_branch2c" name: "scale2c_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2b" bottom: "res2c_branch2c" top: "res2c" name: "res2c" type: "Eltwise" } layer { bottom: "res2c" top: "res2c" name: "res2c_relu" type: "ReLU" } layer { bottom: "res2c" top: "res3a_branch1" name: "res3a_branch1" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res3a_branch1" top: "res3a_branch1" name: "bn3a_branch1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3a_branch1" top: "res3a_branch1" name: "scale3a_branch1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res2c" top: "res3a_branch2a" name: "res3a_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res3a_branch2a" top: "res3a_branch2a" name: "bn3a_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3a_branch2a" top: "res3a_branch2a" name: "scale3a_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3a_branch2a" top: "res3a_branch2a" name: "res3a_branch2a_relu" type: "ReLU" } layer { bottom: "res3a_branch2a" top: "res3a_branch2b" name: "res3a_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3a_branch2b" top: "res3a_branch2b" name: "bn3a_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3a_branch2b" top: "res3a_branch2b" name: "scale3a_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3a_branch2b" top: "res3a_branch2b" name: "res3a_branch2b_relu" type: "ReLU" } layer { bottom: "res3a_branch2b" top: "res3a_branch2c" name: "res3a_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3a_branch2c" top: "res3a_branch2c" name: "bn3a_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3a_branch2c" top: "res3a_branch2c" name: "scale3a_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3a_branch1" bottom: "res3a_branch2c" top: "res3a" name: "res3a" type: "Eltwise" } layer { bottom: "res3a" top: "res3a" name: "res3a_relu" type: "ReLU" } layer { bottom: "res3a" top: "res3b_branch2a" name: "res3b_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b_branch2a" top: "res3b_branch2a" name: "bn3b_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b_branch2a" top: "res3b_branch2a" name: "scale3b_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b_branch2a" top: "res3b_branch2a" name: "res3b_branch2a_relu" type: "ReLU" } layer { bottom: "res3b_branch2a" top: "res3b_branch2b" name: "res3b_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3b_branch2b" top: "res3b_branch2b" name: "bn3b_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b_branch2b" top: "res3b_branch2b" name: "scale3b_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b_branch2b" top: "res3b_branch2b" name: "res3b_branch2b_relu" type: "ReLU" } layer { bottom: "res3b_branch2b" top: "res3b_branch2c" name: "res3b_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3b_branch2c" top: "res3b_branch2c" name: "bn3b_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3b_branch2c" top: "res3b_branch2c" name: "scale3b_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3a" bottom: "res3b_branch2c" top: "res3b" name: "res3b" type: "Eltwise" } layer { bottom: "res3b" top: "res3b" name: "res3b_relu" type: "ReLU" } layer { bottom: "res3b" top: "res3c_branch2a" name: "res3c_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3c_branch2a" top: "res3c_branch2a" name: "bn3c_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3c_branch2a" top: "res3c_branch2a" name: "scale3c_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3c_branch2a" top: "res3c_branch2a" name: "res3c_branch2a_relu" type: "ReLU" } layer { bottom: "res3c_branch2a" top: "res3c_branch2b" name: "res3c_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3c_branch2b" top: "res3c_branch2b" name: "bn3c_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3c_branch2b" top: "res3c_branch2b" name: "scale3c_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3c_branch2b" top: "res3c_branch2b" name: "res3c_branch2b_relu" type: "ReLU" } layer { bottom: "res3c_branch2b" top: "res3c_branch2c" name: "res3c_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3c_branch2c" top: "res3c_branch2c" name: "bn3c_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3c_branch2c" top: "res3c_branch2c" name: "scale3c_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3b" bottom: "res3c_branch2c" top: "res3c" name: "res3c" type: "Eltwise" } layer { bottom: "res3c" top: "res3c" name: "res3c_relu" type: "ReLU" } layer { bottom: "res3c" top: "res3d_branch2a" name: "res3d_branch2a" type: "Convolution" convolution_param { num_output: 128 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3d_branch2a" top: "res3d_branch2a" name: "bn3d_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3d_branch2a" top: "res3d_branch2a" name: "scale3d_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3d_branch2a" top: "res3d_branch2a" name: "res3d_branch2a_relu" type: "ReLU" } layer { bottom: "res3d_branch2a" top: "res3d_branch2b" name: "res3d_branch2b" type: "Convolution" convolution_param { num_output: 128 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res3d_branch2b" top: "res3d_branch2b" name: "bn3d_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3d_branch2b" top: "res3d_branch2b" name: "scale3d_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3d_branch2b" top: "res3d_branch2b" name: "res3d_branch2b_relu" type: "ReLU" } layer { bottom: "res3d_branch2b" top: "res3d_branch2c" name: "res3d_branch2c" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res3d_branch2c" top: "res3d_branch2c" name: "bn3d_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res3d_branch2c" top: "res3d_branch2c" name: "scale3d_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3c" bottom: "res3d_branch2c" top: "res3d" name: "res3d" type: "Eltwise" } layer { bottom: "res3d" top: "res3d" name: "res3d_relu" type: "ReLU" } layer { bottom: "res3d" top: "res4a_branch1" name: "res4a_branch1" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res4a_branch1" top: "res4a_branch1" name: "bn4a_branch1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4a_branch1" top: "res4a_branch1" name: "scale4a_branch1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res3d" top: "res4a_branch2a" name: "res4a_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res4a_branch2a" top: "res4a_branch2a" name: "bn4a_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4a_branch2a" top: "res4a_branch2a" name: "scale4a_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4a_branch2a" top: "res4a_branch2a" name: "res4a_branch2a_relu" type: "ReLU" } layer { bottom: "res4a_branch2a" top: "res4a_branch2b" name: "res4a_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4a_branch2b" top: "res4a_branch2b" name: "bn4a_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4a_branch2b" top: "res4a_branch2b" name: "scale4a_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4a_branch2b" top: "res4a_branch2b" name: "res4a_branch2b_relu" type: "ReLU" } layer { bottom: "res4a_branch2b" top: "res4a_branch2c" name: "res4a_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4a_branch2c" top: "res4a_branch2c" name: "bn4a_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4a_branch2c" top: "res4a_branch2c" name: "scale4a_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4a_branch1" bottom: "res4a_branch2c" top: "res4a" name: "res4a" type: "Eltwise" } layer { bottom: "res4a" top: "res4a" name: "res4a_relu" type: "ReLU" } layer { bottom: "res4a" top: "res4b_branch2a" name: "res4b_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b_branch2a" top: "res4b_branch2a" name: "bn4b_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b_branch2a" top: "res4b_branch2a" name: "scale4b_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b_branch2a" top: "res4b_branch2a" name: "res4b_branch2a_relu" type: "ReLU" } layer { bottom: "res4b_branch2a" top: "res4b_branch2b" name: "res4b_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4b_branch2b" top: "res4b_branch2b" name: "bn4b_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b_branch2b" top: "res4b_branch2b" name: "scale4b_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b_branch2b" top: "res4b_branch2b" name: "res4b_branch2b_relu" type: "ReLU" } layer { bottom: "res4b_branch2b" top: "res4b_branch2c" name: "res4b_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4b_branch2c" top: "res4b_branch2c" name: "bn4b_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4b_branch2c" top: "res4b_branch2c" name: "scale4b_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4a" bottom: "res4b_branch2c" top: "res4b" name: "res4b" type: "Eltwise" } layer { bottom: "res4b" top: "res4b" name: "res4b_relu" type: "ReLU" } layer { bottom: "res4b" top: "res4c_branch2a" name: "res4c_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4c_branch2a" top: "res4c_branch2a" name: "bn4c_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4c_branch2a" top: "res4c_branch2a" name: "scale4c_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4c_branch2a" top: "res4c_branch2a" name: "res4c_branch2a_relu" type: "ReLU" } layer { bottom: "res4c_branch2a" top: "res4c_branch2b" name: "res4c_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4c_branch2b" top: "res4c_branch2b" name: "bn4c_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4c_branch2b" top: "res4c_branch2b" name: "scale4c_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4c_branch2b" top: "res4c_branch2b" name: "res4c_branch2b_relu" type: "ReLU" } layer { bottom: "res4c_branch2b" top: "res4c_branch2c" name: "res4c_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4c_branch2c" top: "res4c_branch2c" name: "bn4c_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4c_branch2c" top: "res4c_branch2c" name: "scale4c_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4b" bottom: "res4c_branch2c" top: "res4c" name: "res4c" type: "Eltwise" } layer { bottom: "res4c" top: "res4c" name: "res4c_relu" type: "ReLU" } layer { bottom: "res4c" top: "res4d_branch2a" name: "res4d_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4d_branch2a" top: "res4d_branch2a" name: "bn4d_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4d_branch2a" top: "res4d_branch2a" name: "scale4d_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4d_branch2a" top: "res4d_branch2a" name: "res4d_branch2a_relu" type: "ReLU" } layer { bottom: "res4d_branch2a" top: "res4d_branch2b" name: "res4d_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4d_branch2b" top: "res4d_branch2b" name: "bn4d_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4d_branch2b" top: "res4d_branch2b" name: "scale4d_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4d_branch2b" top: "res4d_branch2b" name: "res4d_branch2b_relu" type: "ReLU" } layer { bottom: "res4d_branch2b" top: "res4d_branch2c" name: "res4d_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4d_branch2c" top: "res4d_branch2c" name: "bn4d_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4d_branch2c" top: "res4d_branch2c" name: "scale4d_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4c" bottom: "res4d_branch2c" top: "res4d" name: "res4d" type: "Eltwise" } layer { bottom: "res4d" top: "res4d" name: "res4d_relu" type: "ReLU" } layer { bottom: "res4d" top: "res4e_branch2a" name: "res4e_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4e_branch2a" top: "res4e_branch2a" name: "bn4e_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4e_branch2a" top: "res4e_branch2a" name: "scale4e_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4e_branch2a" top: "res4e_branch2a" name: "res4e_branch2a_relu" type: "ReLU" } layer { bottom: "res4e_branch2a" top: "res4e_branch2b" name: "res4e_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4e_branch2b" top: "res4e_branch2b" name: "bn4e_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4e_branch2b" top: "res4e_branch2b" name: "scale4e_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4e_branch2b" top: "res4e_branch2b" name: "res4e_branch2b_relu" type: "ReLU" } layer { bottom: "res4e_branch2b" top: "res4e_branch2c" name: "res4e_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4e_branch2c" top: "res4e_branch2c" name: "bn4e_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4e_branch2c" top: "res4e_branch2c" name: "scale4e_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4d" bottom: "res4e_branch2c" top: "res4e" name: "res4e" type: "Eltwise" } layer { bottom: "res4e" top: "res4e" name: "res4e_relu" type: "ReLU" } layer { bottom: "res4e" top: "res4f_branch2a" name: "res4f_branch2a" type: "Convolution" convolution_param { num_output: 256 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4f_branch2a" top: "res4f_branch2a" name: "bn4f_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4f_branch2a" top: "res4f_branch2a" name: "scale4f_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4f_branch2a" top: "res4f_branch2a" name: "res4f_branch2a_relu" type: "ReLU" } layer { bottom: "res4f_branch2a" top: "res4f_branch2b" name: "res4f_branch2b" type: "Convolution" convolution_param { num_output: 256 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res4f_branch2b" top: "res4f_branch2b" name: "bn4f_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4f_branch2b" top: "res4f_branch2b" name: "scale4f_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4f_branch2b" top: "res4f_branch2b" name: "res4f_branch2b_relu" type: "ReLU" } layer { bottom: "res4f_branch2b" top: "res4f_branch2c" name: "res4f_branch2c" type: "Convolution" convolution_param { num_output: 1024 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res4f_branch2c" top: "res4f_branch2c" name: "bn4f_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res4f_branch2c" top: "res4f_branch2c" name: "scale4f_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4e" bottom: "res4f_branch2c" top: "res4f" name: "res4f" type: "Eltwise" } layer { bottom: "res4f" top: "res4f" name: "res4f_relu" type: "ReLU" } layer { bottom: "res4f" top: "res5a_branch1" name: "res5a_branch1" type: "Convolution" convolution_param { num_output: 2048 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res5a_branch1" top: "res5a_branch1" name: "bn5a_branch1" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5a_branch1" top: "res5a_branch1" name: "scale5a_branch1" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res4f" top: "res5a_branch2a" name: "res5a_branch2a" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 2 bias_term: false } } layer { bottom: "res5a_branch2a" top: "res5a_branch2a" name: "bn5a_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5a_branch2a" top: "res5a_branch2a" name: "scale5a_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5a_branch2a" top: "res5a_branch2a" name: "res5a_branch2a_relu" type: "ReLU" } layer { bottom: "res5a_branch2a" top: "res5a_branch2b" name: "res5a_branch2b" type: "Convolution" convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res5a_branch2b" top: "res5a_branch2b" name: "bn5a_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5a_branch2b" top: "res5a_branch2b" name: "scale5a_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5a_branch2b" top: "res5a_branch2b" name: "res5a_branch2b_relu" type: "ReLU" } layer { bottom: "res5a_branch2b" top: "res5a_branch2c" name: "res5a_branch2c" type: "Convolution" convolution_param { num_output: 2048 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5a_branch2c" top: "res5a_branch2c" name: "bn5a_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5a_branch2c" top: "res5a_branch2c" name: "scale5a_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5a_branch1" bottom: "res5a_branch2c" top: "res5a" name: "res5a" type: "Eltwise" } layer { bottom: "res5a" top: "res5a" name: "res5a_relu" type: "ReLU" } layer { bottom: "res5a" top: "res5b_branch2a" name: "res5b_branch2a" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5b_branch2a" top: "res5b_branch2a" name: "bn5b_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5b_branch2a" top: "res5b_branch2a" name: "scale5b_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5b_branch2a" top: "res5b_branch2a" name: "res5b_branch2a_relu" type: "ReLU" } layer { bottom: "res5b_branch2a" top: "res5b_branch2b" name: "res5b_branch2b" type: "Convolution" convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res5b_branch2b" top: "res5b_branch2b" name: "bn5b_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5b_branch2b" top: "res5b_branch2b" name: "scale5b_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5b_branch2b" top: "res5b_branch2b" name: "res5b_branch2b_relu" type: "ReLU" } layer { bottom: "res5b_branch2b" top: "res5b_branch2c" name: "res5b_branch2c" type: "Convolution" convolution_param { num_output: 2048 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5b_branch2c" top: "res5b_branch2c" name: "bn5b_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5b_branch2c" top: "res5b_branch2c" name: "scale5b_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5a" bottom: "res5b_branch2c" top: "res5b" name: "res5b" type: "Eltwise" } layer { bottom: "res5b" top: "res5b" name: "res5b_relu" type: "ReLU" } layer { bottom: "res5b" top: "res5c_branch2a" name: "res5c_branch2a" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5c_branch2a" top: "res5c_branch2a" name: "bn5c_branch2a" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5c_branch2a" top: "res5c_branch2a" name: "scale5c_branch2a" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5c_branch2a" top: "res5c_branch2a" name: "res5c_branch2a_relu" type: "ReLU" } layer { bottom: "res5c_branch2a" top: "res5c_branch2b" name: "res5c_branch2b" type: "Convolution" convolution_param { num_output: 512 kernel_size: 3 pad: 1 stride: 1 bias_term: false } } layer { bottom: "res5c_branch2b" top: "res5c_branch2b" name: "bn5c_branch2b" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5c_branch2b" top: "res5c_branch2b" name: "scale5c_branch2b" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5c_branch2b" top: "res5c_branch2b" name: "res5c_branch2b_relu" type: "ReLU" } layer { bottom: "res5c_branch2b" top: "res5c_branch2c" name: "res5c_branch2c" type: "Convolution" convolution_param { num_output: 2048 kernel_size: 1 pad: 0 stride: 1 bias_term: false } } layer { bottom: "res5c_branch2c" top: "res5c_branch2c" name: "bn5c_branch2c" type: "BatchNorm" batch_norm_param { use_global_stats: true } } layer { bottom: "res5c_branch2c" top: "res5c_branch2c" name: "scale5c_branch2c" type: "Scale" scale_param { bias_term: true } } layer { bottom: "res5b" bottom: "res5c_branch2c" top: "res5c" name: "res5c" type: "Eltwise" } layer { bottom: "res5c" top: "res5c" name: "res5c_relu" type: "ReLU" } layer { bottom: "res5c" top: "pool5" name: "pool5" type: "Pooling" pooling_param { kernel_size: 7 stride: 1 pool: AVE } } layer { bottom: "pool5" top: "fc1000" name: "fc1000" type: "InnerProduct" inner_product_param { num_output: 1000 } } layer { bottom: "fc1000" top: "prob" name: "prob" type: "Softmax" } ================================================ FILE: models/mps_builder ================================================ #!/bin/bash -e # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # cleanup() { kill $(jobs -p) ||: echo quit | nvidia-cuda-mps-control > /dev/null 2>&1 ||: } trap "cleanup" EXIT SIGINT SIGTERM active_sms=${1:-100} echo "Setting Active SM Percentage: ${active_sms}" export CUDA_MPS_ACTIVE_THREAD_PERCENTAGE=${active_sms} nvidia-cuda-mps-control -d ||: sleep 1 echo echo "Starting a new shell with MPS running..." bash --rcfile <(echo "PS1='MPS Subshell: '") ================================================ FILE: models/onnx/common.py ================================================ import os import argparse import numpy as np import pycuda.driver as cuda import tensorrt as trt try: # Sometimes python2 does not understand FileNotFoundError FileNotFoundError except NameError: FileNotFoundError = IOError def GiB(val): return val * 1 << 30 def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]): ''' Parses sample arguments. Args: description (str): Description of the sample. subfolder (str): The subfolder containing data relevant to this sample find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. Returns: str: Path of data directory. Raises: FileNotFoundError ''' kDEFAULT_DATA_ROOT = os.path.abspath("/usr/src/tensorrt/data") parser = argparse.ArgumentParser(description=description) # Standard command-line arguments for all samples. parser.add_argument("-d", "--datadir", help="Location of the TensorRT sample data directory.") args, unknown_args = parser.parse_known_args() # If data directory is not specified, use the default. data_root = args.datadir if args.datadir else kDEFAULT_DATA_ROOT data_path = os.path.join(data_root, subfolder) if subfolder else data_root # Make sure data directory exists. if not (os.path.exists(data_path)): raise FileNotFoundError(data_path + " does not exist. Please provide the correct data path with the -d option.") # Find all requested files. for index, f in enumerate(find_files): find_files[index] = os.path.abspath(os.path.join(data_path, f)) if not os.path.exists(find_files[index]): raise FileNotFoundError(find_files[index] + " does not exist. Please provide the correct data path with the -d option.") if find_files: return data_path, find_files else: return data_path # Simple helper data class that's a little nicer to use than a 2-tuple. class HostDeviceMem(object): def __init__(self, host_mem, device_mem): self.host = host_mem self.device = device_mem def __str__(self): return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) def __repr__(self): return self.__str__() # Allocates all buffers required for an engine, i.e. host/device inputs/outputs. def allocate_buffers(engine): inputs = [] outputs = [] bindings = [] stream = cuda.Stream() for binding in engine: size = trt.volume(engine.get_binding_shape(binding)) dtype = trt.nptype(engine.get_binding_dtype(binding)) # Allocate host and device buffers host_mem = cuda.pagelocked_empty(size, dtype) device_mem = cuda.mem_alloc(host_mem.nbytes) # Append the device buffer to device bindings. bindings.append(int(device_mem)) # Append to the appropriate list. if engine.binding_is_input(binding): inputs.append(HostDeviceMem(host_mem, device_mem)) else: outputs.append(HostDeviceMem(host_mem, device_mem)) return inputs, outputs, bindings, stream # This function is generalized for multiple inputs/outputs. # inputs and outputs are expected to be lists of HostDeviceMem objects. def do_inference(context, bindings, inputs, outputs, stream): # Transfer input data to the GPU. [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] # Run inference. context.execute_async(bindings=bindings, stream_handle=stream.handle) # Transfer predictions back from the GPU. [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] # Synchronize the stream stream.synchronize() # Return only the host outputs. return [out.host for out in outputs] ================================================ FILE: models/onnx/mnist-v1.3/test_data_set_0/output_0.pb ================================================  J(sDUČtE'DWQeYQ3vNKB ================================================ FILE: models/onnx/mnist-v1.3/test_data_set_1/output_0.pb ================================================  J(E _;ÁXyĄ*_DHԺÓ!9Z ================================================ FILE: models/onnx/mnist-v1.3/test_data_set_2/output_0.pb ================================================  J(l4ĐDMWD˿>'a¤&B6hE ================================================ FILE: models/onnx/onnx_builder.py ================================================ # This sample uses an ONNX ResNet50 Model to create a TensorRT Inference Engine import random from PIL import Image import numpy as np import ctypes import pycuda.driver as cuda # This import causes pycuda to automatically manage CUDA context creation and cleanup. import pycuda.autoinit import tensorrt as trt import sys, os sys.path.insert(1, os.path.join(sys.path[0], ".")) import common def softmax(X, theta = 1.0, axis = None): """ Compute the softmax of each element along an axis of X. Parameters ---------- X: ND-Array. Probably should be floats. theta (optional): float parameter, used as a multiplier prior to exponentiation. Default = 1.0 axis (optional): axis to compute values along. Default is the first non-singleton axis. Returns an array the same size as X. The result will sum to 1 along the specified axis. """ # make X at least 2d y = np.atleast_2d(X) # find axis if axis is None: axis = next(j[0] for j in enumerate(y.shape) if j[1] > 1) # multiply y against the theta parameter, y = y * float(theta) # subtract the max for numerical stability y = y - np.expand_dims(np.max(y, axis = axis), axis) # exponentiate y y = np.exp(y) # take the sum along the specified axis ax_sum = np.expand_dims(np.sum(y, axis = axis), axis) # finally: divide elementwise p = y / ax_sum # flatten if X was 1D if len(X.shape) == 1: p = p.flatten() return p class ModelData(object): MODEL_PATH = "/work/models/flowers-152.onnx" INPUT_SHAPE = (3, 224, 224) # We can convert TensorRT data types to numpy types with trt.nptype() DTYPE = trt.float32 # You can set the logger severity higher to suppress messages (or lower to display more messages). TRT_LOGGER = trt.Logger(trt.Logger.WARNING) # Allocate host and device buffers, and create a stream. def allocate_buffers(engine): # Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs. h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE)) h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE)) # Allocate device memory for inputs and outputs. d_input = cuda.mem_alloc(h_input.nbytes) d_output = cuda.mem_alloc(h_output.nbytes) # Create a stream in which to copy inputs/outputs and run inference. stream = cuda.Stream() return h_input, d_input, h_output, d_output, stream def do_inference(context, h_input, d_input, h_output, d_output, stream): # Transfer input data to the GPU. cuda.memcpy_htod_async(d_input, h_input, stream) # Run inference. context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) # Transfer predictions back from the GPU. cuda.memcpy_dtoh_async(h_output, d_output, stream) # Synchronize the stream stream.synchronize() # The Onnx path is used for Onnx models. def build_engine_onnx(model_file, calibrator=None): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) builder.max_batch_size = 8 precision = "fp32" if calibrator: builder.int8_mode = True builder.int8_calibrator = calibrator precision = "int8" else: builder.fp16_mode = True precision = "fp16" # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: parser.parse(model.read()) engine = builder.build_cuda_engine(network) serialized = engine.serialize() with open("/work/models/flowers-152-b{}-{}.engine".format(builder.max_batch_size, precision), "wb") as file: file.write(serialized) return engine def normalize_image(image_name): image = Image.open(image_name) # Resize, antialias and transpose the image to CHW. c, h, w = ModelData.INPUT_SHAPE image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel() # This particular ResNet50 model requires some preprocessing, specifically, mean normalization. return ((image_arr / 255.0) - 0.5) * 2.0 def load_normalized_test_case(test_image, pagelocked_buffer): # Normalize the image and copy to pagelocked memory. np.copyto(pagelocked_buffer, normalize_image(test_image)) return test_image def create_calibration_dataset(): jpegs = [] for dirpath, subdirs, files in os.walk("/work/models/flowers-data/flowers"): for f in files: if f.endswith("jpg"): jpegs.append(os.path.join(dirpath, f)) random.shuffle(jpegs) return jpegs[:200] class ImageBatchStream: def __init__(self, batch_size, calibration_files): c, h, w = ModelData.INPUT_SHAPE self.batch_size = batch_size self.files = calibration_files self.batch = 0 self.max_batches = (len(calibration_files) // batch_size) + \ (1 if (len(calibration_files) % batch_size) else 0) self.calibration_data = np.zeros((batch_size, c, h, w), dtype=np.float32) def reset(self): self.batch = 0 def next_batch(self): c, h, w = ModelData.INPUT_SHAPE if self.batch < self.max_batches: imgs = [] files_for_batch = self.files[self.batch_size * self.batch : \ self.batch_size * (self.batch + 1)] for f in files_for_batch: print("[ImageBatchStream] Processing ", f) img = normalize_image(f) imgs.append(img.reshape((c, h, w))) for i in range(len(imgs)): self.calibration_data[i] = imgs[i] self.batch += 1 return np.ascontiguousarray(self.calibration_data, dtype=np.float32) else: return np.array([]) class MyEntropyCalibrator(trt.IInt8EntropyCalibrator): def __init__(self, stream): trt.IInt8EntropyCalibrator.__init__(self) self.batchstream = stream self.d_input = cuda.mem_alloc(self.batchstream.calibration_data.nbytes) stream.reset() def get_batch_size(self): return self.batchstream.batch_size def get_batch(self, bindings, names): batch = self.batchstream.next_batch() if not batch.size: return None cuda.memcpy_htod(self.d_input, batch) bindings[0] = int(self.d_input) return bindings def read_calibration_cache(self, length): return None def write_calibration_cache(self, ptr, size): # cache = ctypes.c_char_p(int(ptr)) # with open('calibration_cache.bin', 'wb') as f: # f.write(cache.value) return None def main(): calibration_files = create_calibration_dataset() batch_stream = ImageBatchStream(8, calibration_files) int8_calibrator = None int8_calibrator = MyEntropyCalibrator(batch_stream) engine = build_engine_onnx("/work/models/flowers-152.onnx", calibrator=int8_calibrator) # serialized = engine.serialize() # with open("/work/models/flowers-152-b8-int8.engine", "wb") as file: # file.write(serialized) # h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # with engine.create_execution_context() as context: # for test_image in ["/work/models/flowers-data/test/image_07927.jpg", # "/work/models/flowers-data/test/image_06969.jpg",]: # #test_image = "/work/models/flowers-data/test/image_07927.jpg" # 13 - blanket flower # #test_image = "/work/models/flowers-data/test/image_06969.jpg" # 0 - alpine sea holly # test_case = load_normalized_test_case(test_image, h_input) # do_inference(context, h_input, d_input, h_output, d_output, stream) # # We use the highest probability as our prediction. Its index corresponds to the predicted label. # pred = np.argmax(h_output) # score = softmax(h_output)[pred] # print("Recognized " + test_case + " as " + str(pred) + " score: " + str(score)) def old_main(): # Set the data path to the directory that contains the trained models and test images for inference. data_path, data_files = common.find_sample_data(description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) # Get test images, models and labels. test_images = data_files[0:3] onnx_model_file, labels_file = data_files[3:] labels = open(labels_file, 'r').read().split('\n') # Build a TensorRT engine. with build_engine_onnx(onnx_model_file) as engine: # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same. # Allocate buffers and create a CUDA stream. h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) # Contexts are used to perform inference. with engine.create_execution_context() as context: # Load a normalized test case into the host input page-locked buffer. test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the # probability that the image corresponds to that label do_inference(context, h_input, d_input, h_output, d_output, stream) # We use the highest probability as our prediction. Its index corresponds to the predicted label. pred = np.argmax(h_output) print("Recognized " + test_case + " as " + pred) if __name__ == '__main__': main() ================================================ FILE: models/setup.py ================================================ #!/usr/bin/env python3 # # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # import os import subprocess models = [ ("ResNet-50-deploy.prototxt", "prob"), # ("ResNet-152-deploy.prototxt", "prob"), ] precisions = [ # ("fp32", ""), ("fp16", "--fp16"), # ("int8", "--int8") ] def main(): for model, o in models: for name, p in precisions: for b in [1, 8]: #, 2, 4, 8]: n = "b{}-{}".format(b, name) e = model.replace("prototxt", "engine") e = e.replace("deploy", n) m = os.path.join("/work/models", model) if os.path.isfile(e): continue subprocess.call("trtexec --deploy={} --batch={} --output={} {} --engine={}".format( m, b, o, p, e ), shell=True) if __name__ == "__main__": main() ================================================ FILE: notebooks/Demo Day 1.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "import numpy as np\n", "import wurlitzer\n", "\n", "import trtlab\n", "import infer_test_utils as utils\n", "\n", "# this allows us to capture stdout and stderr from the backend c++ infer-runtime\n", "display_output = wurlitzer.sys_pipes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "inputs = utils.load_inputs(\"/work/models/onnx/mnist-v1.3/test_data_set_0\")\n", "expected = utils.load_outputs(\"/work/models/onnx/mnist-v1.3/test_data_set_0\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!trtexec --onnx=/work/models/onnx/mnist-v1.3/model.onnx --saveEngine=/tmp/mnist-v1.3.engine" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Local Inference Setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " manager = trtlab.InferenceManager(max_exec_concurrency=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " manager.register_tensorrt_engine(\"mnist\", \"/tmp/mnist-v1.3.engine\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " manager.update_resources()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Local Inference Properties" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnist = manager.infer_runner(\"mnist\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnist.input_bindings()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnist.output_bindings()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Local Inference Compute" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "futures = [mnist.infer(Input3=input) for input in inputs]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# free to do other work while inference is being computed" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "results = [f.get() for f in futures]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for r, e in zip(results, expected):\n", " for key, val in r.items():\n", " r = val.reshape((1,10))\n", " np.testing.assert_almost_equal(r, e, decimal=3)\n", " print(\"Test Passed\")\n", " print(\"Result: {}\".format(np.argmax(utils.softmax(r))))\n", "\n", "utils.mnist_image(inputs[0]).show()\n", "expected[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/Demo Day 2.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "import numpy as np\n", "import wurlitzer\n", "\n", "import trtlab\n", "import infer_test_utils as utils\n", "\n", "# this allows us to capture stdout and stderr from the backend c++ infer-runtime\n", "display_output = wurlitzer.sys_pipes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "inputs = utils.load_inputs(\"/work/models/onnx/mnist-v1.3/test_data_set_0\")\n", "expected = utils.load_outputs(\"/work/models/onnx/mnist-v1.3/test_data_set_0\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Remote Inference Setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " manager = trtlab.RemoteInferenceManager(hostname=\"localhost:50052\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " models = manager.get_models()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " print(models)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Remote Inference Properties" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnist = manager.infer_runner(\"mnist\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnist.input_bindings()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnist.output_bindings()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Remote Inference Compute" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "futures = [mnist.infer(Input3=input) for input in inputs]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# free to do other work while inference is being computed" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "results = [f.get() for f in futures]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for r, e in zip(results, expected):\n", " for key, val in r.items():\n", " r = val.reshape((1,10))\n", " np.testing.assert_almost_equal(r, e, decimal=3)\n", " print(\"Test Passed\")\n", " print(\"Output Binding Name: {}; shape: {}\".format(key, val.shape))\n", " print(\"Result: {}\".format(np.argmax(utils.softmax(r))))\n", " \n", "utils.mnist_image(inputs[0]).show()\n", "expected[0]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/Demo Day 3.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "import numpy as np\n", "import wurlitzer\n", "\n", "import trtlab\n", "import infer_test_utils as utils\n", "\n", "# this allows us to capture stdout and stderr from the backend c++ infer-runtime\n", "display_output = wurlitzer.sys_pipes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!trtexec --onnx=/work/models/onnx/mnist-v1.3/model.onnx --saveEngine=/tmp/mnist-v1.3.engine" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Remote Inference Setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " manager = trtlab.InferenceManager(max_exec_concurrency=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " manager.register_tensorrt_engine(\"mnist\", \"/tmp/mnist-v1.3.engine\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " manager.update_resources()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Remote Inference Server" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " manager.serve()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/Multiple Models.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "import numpy as np\n", "import wurlitzer\n", "\n", "import trtlab\n", "\n", "# this allows us to capture stdout and stderr from the backend c++ infer-runtime\n", "display_output = wurlitzer.sys_pipes" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "!/work/models/setup.py" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Local Inference Setup" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING: Logging before InitGoogleLogging() is written to STDERR\n", "I0204 22:01:27.543411 925 inference_manager.cc:64] -- Initialzing TensorRT Resource Manager --\n", "I0204 22:01:27.543426 925 inference_manager.cc:65] Maximum Execution Concurrency: 4\n", "I0204 22:01:27.543429 925 inference_manager.cc:66] Maximum Copy Concurrency: 8\n" ] } ], "source": [ "with display_output():\n", " manager = trtlab.InferenceManager(max_exec_concurrency=4)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "I0204 22:01:30.164453 925 model.cc:91] Binding: data; isInput: true; dtype size: 4; bytes per batch item: 602112\n", "I0204 22:01:30.164479 925 model.cc:91] Binding: prob; isInput: false; dtype size: 4; bytes per batch item: 4000\n", "I0204 22:01:30.169529 925 inference_manager.cc:149] -- Registering Model: rn50-b1 --\n", "I0204 22:01:30.169546 925 inference_manager.cc:150] Input/Output Tensors require 591.9 KiB\n", "I0204 22:01:30.169550 925 inference_manager.cc:151] Execution Activations require 5.7 MiB\n", "I0204 22:01:30.169554 925 inference_manager.cc:155] Weights require 75.8 MiB\n", "I0204 22:01:30.223752 925 model.cc:91] Binding: data; isInput: true; dtype size: 4; bytes per batch item: 602112\n", "I0204 22:01:30.223776 925 model.cc:91] Binding: prob; isInput: false; dtype size: 4; bytes per batch item: 4000\n", "I0204 22:01:30.227011 925 inference_manager.cc:149] -- Registering Model: rn50-b8 --\n", "I0204 22:01:30.227035 925 inference_manager.cc:150] Input/Output Tensors require 4.6 MiB\n", "I0204 22:01:30.227041 925 inference_manager.cc:151] Execution Activations require 39.8 MiB\n", "I0204 22:01:30.227046 925 inference_manager.cc:155] Weights require 49.0 MiB\n" ] } ], "source": [ "with display_output():\n", " manager.register_tensorrt_engine(\"rn50-b1\", \"/work/models/ResNet-50-b1-fp16.engine\")\n", " manager.register_tensorrt_engine(\"rn50-b8\", \"/work/models/ResNet-50-b8-fp16.engine\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "I0204 22:01:31.025523 925 inference_manager.cc:194] -- Allocating TensorRT Resources --\n", "I0204 22:01:31.025539 925 inference_manager.cc:195] Creating 4 TensorRT execution tokens.\n", "I0204 22:01:31.025542 925 inference_manager.cc:196] Creating a Pool of 8 Host/Device Memory Stacks\n", "I0204 22:01:31.025550 925 inference_manager.cc:197] Each Host Stack contains 4.7 MiB\n", "I0204 22:01:31.025554 925 inference_manager.cc:198] Each Device Stack contains 4.8 MiB\n", "I0204 22:01:31.025559 925 inference_manager.cc:199] Total GPU Memory: 197.5 MiB\n" ] } ], "source": [ "with display_output():\n", " manager.update_resources()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Local Inference Properties" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "b1 = manager.infer_runner(\"rn50-b1\")\n", "b8 = manager.infer_runner(\"rn50-b8\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'data': {'dtype': dtype('float32'), 'shape': [3, 224, 224]}}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b1.input_bindings()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b1.max_batch_size()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'data': {'dtype': dtype('float32'), 'shape': [3, 224, 224]}}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b8.input_bindings()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b8.max_batch_size()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def max_batch_size_shape(x, input='data'):\n", " batch = [x.max_batch_size()]\n", " batch.extend(x.input_bindings()[input]['shape'])\n", " return batch" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[8, 3, 224, 224]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "max_batch_size_shape(b8)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Compute\n", "\n", "Here we launch two async inferences with two different TensorRT engines, one built for batch1, the other for batch8. While these are the same ResNet-50 models, they could be any two unique TensorRT engines.\n", "\n", "Note: for this example the weights of the model and the input tensors are all random values." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "futures = [model.infer(data=np.random.random_sample(max_batch_size_shape(model))) for model in [b1, b8]]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# free to do other work while inference is being computed" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "results = [f.get() for f in futures]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "prob binding has shape: (1, 1000, 1, 1)\n", "prob binding has shape: (8, 1000, 1, 1)\n" ] } ], "source": [ "for result in results:\n", " for output, tensor in result.items():\n", " print(\"{} binding has shape: {}\".format(output, tensor.shape))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/Quickstart.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# TensorRT Runtime\n", "\n", "This example walks through the basic usecase of:\n", " 1. initialization the infer-runtime\n", " 2. loading a model\n", " 3. allocating resources\n", " 4. inspecting the input/output bindings of the model\n", " 5. evaluating the model using async futures\n", " 6. testing for correctness" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "import numpy as np\n", "import wurlitzer\n", "\n", "import trtlab\n", "import infer_test_utils as utils\n", "\n", "# this allows us to capture stdout and stderr from the backend c++ infer-runtime\n", "display_output = wurlitzer.sys_pipes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!trtexec --onnx=/work/models/onnx/mnist-v1.3/model.onnx --saveEngine=/work/models/onnx/mnist-v1.3/mnist-v1.3.engine" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Initialize infer-runtime\n", "\n", "The most important option when initializing the infer-runtime is to set the maximum number of conncurrent executions that can be executed at any given time. This value is tunable for your application. Lower setting reduce latency; higher-settings increase throughput. Evaluate how your model performs using ...TODO-this-notebook..." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " models = infer.InferenceManager(max_executions=2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Register a Model\n", "\n", "To register a model, simply associate a `model_name` with a path to a TensorRT engine file. The returned object is an `InferRunner` object. Use an `InferRunner` to submit work to the backend inference queue." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " mnist = models.register_tensorrt_engine(\"mnist\", \"/work/models/onnx/mnist-v1.3/mnist-v1.3.engine\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Allocate Resources\n", "\n", "Before you can submit inference requests, you need to allocate some internal resources. This should be done anytime new models are registered. There maybe a runtime performance interruption if you update the resources while the queue is full." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " models.update_resources()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Inspect Model\n", "\n", "Query the `InferenceRunner` to see what it expects for inputs and what it will return for outputs." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnist.input_bindings()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnist.output_bindings()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Submit Infer Requests\n", "\n", "`InferenceRunner.infer` accecpts a dict of numpy arrays that match the input description, submits this inference request to the backend compute engine and returns a future to a dict of numpy arrays. \n", "\n", "That means, this method should returns almost immediately; however, that does not mean the inference is complete. Use `get()` to wait for the result. This is a blocking call." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result = mnist.infer(Input3=np.random.random_sample([1,28,28]))\n", "result # result is a future" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result = result.get()\n", "result # result is the value of the future - dict of np arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with display_output():\n", " start = time.process_time()\n", " result = mnist.infer(**{k: np.random.random_sample(v['shape']) for k,v in mnist.input_bindings().items()})\n", " print(\"Queue Time: {}\".format(time.process_time() - start))\n", " result = result.get()\n", " print(\"Compute Time: {}\".format(time.process_time() - start))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. Test for Correctness\n", "\n", "Load test image and results. [Thanks to the ONNX Model Zoo](https://github.com/onnx/models/tree/master/mnist) for this example." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "inputs = utils.load_inputs(\"/work/models/onnx/mnist-v1.3/test_data_set_0\")\n", "expected = utils.load_outputs(\"/work/models/onnx/mnist-v1.3/test_data_set_0\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "utils.mnist_image(inputs[0]).show()\n", "expected[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "Submit the images to the inference queue, then wait for each result to be returned." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "results = [mnist.infer(Input3=input) for input in inputs]\n", "results = [r.get() for r in results]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check results.\n", "TODO - update the utils to return dictionaries instead of arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for r, e in zip(results, expected):\n", " for key, val in r.items():\n", " r = val.reshape((1,10))\n", " np.testing.assert_almost_equal(r, e, decimal=3)\n", " print(\"Test Passed\")\n", " print(\"Output Binding Name: {}; shape: {}\".format(key, val.shape))\n", " print(\"Result: {}\".format(np.argmax(utils.softmax(r))))\n", " # r # show the raw tensor" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/README.md ================================================ # Jupyter Notebooks Note: Many of the ONNX examples will fail until TensorRT 5.1 is available in the container. This includes: - Demo Day 1 - Demo Day 2 - Demo Day 3 ================================================ FILE: requirements.txt ================================================ appdirs==1.4.3 atomicwrites==1.2.1 attrs==18.2.0 backcall==0.1.0 bleach>=3.1.1 boto3==1.9.109 botocore==1.12.109 click==6.7 cmake==3.11.0 cycler==0.10.0 decorator==4.3.0 defusedxml==0.5.0 docutils==0.14 entrypoints==0.3 grpcio==1.16.1 ipykernel==5.1.0 ipython==7.3.0 ipython-genutils==0.2.0 jedi==0.13.3 Jinja2==2.10.1 jmespath==0.9.4 jsonschema==3.0.1 jupyter-client==5.2.4 jupyter-core==4.4.0 jupyterlab==0.35.4 jupyterlab-server==0.2.0 kiwisolver==1.0.1 Mako==1.0.7 MarkupSafe==1.0 matplotlib==3.0.2 mistune==0.8.4 more-itertools==4.3.0 mxnet==1.4.0.post0 nbconvert==5.4.1 nbformat==4.4.0 notebook==5.7.8 numpy==1.15.4 onnx==1.3.0 pandocfilters==1.4.2 parso==0.5.0 pathlib2==2.3.3 pexpect==4.6.0 pickleshare==0.7.5 Pillow>=6.2.2 pluggy==0.8.0 prometheus-client==0.6.0 prompt-toolkit==2.0.9 protobuf==3.7.0 ptyprocess==0.6.0 py==1.7.0 pycuda==2018.1.1 Pygments==2.3.1 pyparsing==2.3.1 pyrsistent==0.14.11 pytest==4.0.2 python-dateutil==2.8.0 pytools==2018.5.2 pyzmq==18.0.0 s3transfer==0.2.0 Send2Trash==1.5.0 six==1.12.0 terminado==0.8.1 testpath==0.4.2 tornado==5.1 traitlets==4.3.2 typing==3.6.6 typing-extensions==3.7.2 urllib3==1.24.2 wcwidth==0.1.7 webencodings==0.5.1 wurlitzer==1.0.2 ================================================ FILE: trtlab/BUILD.bazel ================================================ exports_files([ "core", "cuda", "nvrpc", "tensorrt", ], visibility = ["//visibility:public"], ) ================================================ FILE: trtlab/CMakeLists.txt ================================================ #cmake_minimum_required(VERSION 3.9 FATAL_ERROR) #project(trtlab) #include(GNUInstallDirs) #option(ENABLE_TESTING "Build tests" ON) #set(CMAKE_CXX_STANDARD 17) #set(CMAKE_CXX_EXTENSIONS ON) #set(CMAKE_DEBUG_POSTFIX "-d") set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) #set(default_build_type "Release") #set(CMAKE_THREAD_PREFER_PTHREAD TRUE) #find_package(Threads) #if(ENABLE_TESTING) # message(STATUS "Building Tests") # find_package(GTest) #1.8.1 CONFIG REQUIRED) # find_package(benchmark) # enable_testing() #endif() # suppress warnings #add_compile_options( # $<$,CXX>,$>:-Wno-deprecated-declarations> #) if(BUILD_MEMORY) add_subdirectory(memory) endif() if(BUILD_CORE) add_subdirectory(core) endif() if(BUILD_CUDA) add_subdirectory(cuda) endif() if(BUILD_NVRPC) add_subdirectory(nvrpc) endif() if(BUILD_TENSORRT) add_subdirectory(tensorrt) endif() if(BUILD_PYTHON) add_subdirectory(pybind) endif() # install # include(CMakePackageConfigHelpers) # # install( # EXPORT ${PROJECT_NAME}-targets # NAMESPACE ${PROJECT_NAME}:: # FILE ${PROJECT_NAME}-targets.cmake # DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" # ) # # configure_package_config_file( # "${CMAKE_CURRENT_SOURCE_DIR}/cmake/${PROJECT_NAME}-config.cmake.in" # ${PROJECT_NAME}-config.cmake # INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" # NO_CHECK_REQUIRED_COMPONENTS_MACRO # PATH_VARS CMAKE_INSTALL_INCLUDEDIR # ) # # install( # # FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" # DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" # ) ================================================ FILE: trtlab/core/BUILD.bazel ================================================ cc_library( name = "core", srcs = glob([ "src/**/*.cc", "src/**/*.h", ]), hdrs = glob( ["include/**/*.h"], ), deps = [ "@com_google_glog//:glog", ], strip_include_prefix = "include", visibility = ["//visibility:public"], ) ================================================ FILE: trtlab/core/CMakeLists.txt ================================================ include(GNUInstallDirs) set(CMAKE_THREAD_PREFER_PTHREAD TRUE) find_package(Threads) find_package(cpuaff) find_package(glog 0.3.5 REQUIRED) find_package(dlpack) find_package(Boost REQUIRED COMPONENTS fiber_numa fiber context) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS ON) # when linking against static glog one also needs to link against gflags # which version of gflags is depended on build type find_package(gflags REQUIRED) if( CMAKE_BUILD_TYPE STREQUAL "Debug" ) find_package(gflags COMPONENTS nothreads_shared) set(trtlab_gflags "gflags_nothreads_shared") message(STATUS "gflags: ${trtlab_gflags}") message(STATUS "build_type: ${CMAKE_BUILD_TYPE}") else() find_package(gflags COMPONENTS nothreads_static) set(trtlab_gflags "gflags_nothreads_static") message(STATUS "gflags: ${trtlab_gflags}") message(STATUS "build_type: ${CMAKE_BUILD_TYPE}") endif() set(header_path ${CMAKE_CURRENT_SOURCE_DIR}/include/trtlab/core) add_library(core # src/types.cc src/affinity.cc src/utils.cc src/cyclic_buffer.cc src/cyclic_windowed_buffer.cc ) add_library(${PROJECT_NAME}::core ALIAS core) message(STATUS "jemalloc: ${JEMALLOC_STATIC_LIBRARIES}") target_link_libraries(core PUBLIC trtlab_memory dlpack::dlpack Threads::Threads $<$,$>>:rt> glog::glog ${trtlab_gflags} Boost::fiber Boost::fiber_numa Boost::context ${JEMALLOC_STATIC_LIBRARIES} dl ) target_include_directories(core PUBLIC $ PRIVATE ${header_path} ) set_target_properties(core PROPERTIES OUTPUT_NAME ${PROJECT_NAME}_core) install( TARGETS core EXPORT ${PROJECT_NAME}-targets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) install( DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) #if(ENABLE_TESTING) add_subdirectory(tests) #endif() #if(benchmark_FOUND) add_subdirectory(benchmarks) #endif() ================================================ FILE: trtlab/core/benchmarks/CMakeLists.txt ================================================ # Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of NVIDIA CORPORATION nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. find_package(benchmark) add_executable(bench_core main.cc bench_pool.cc bench_thread_pool.cc bench_batcher.cc # bench_memory.cc # bench_memory_stack.cc ) target_link_libraries(bench_core PRIVATE ${PROJECT_NAME}::core benchmark::benchmark ) add_test(NAME bench_core COMMAND $) ================================================ FILE: trtlab/core/benchmarks/bench_batcher.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include using namespace trtlab; static void batcher_standard_batcher_int(benchmark::State& state) { StandardBatcher batcher(state.range(0)); std::size_t counter = 0; for (auto _ : state) { auto future = batcher.enqueue(++counter); auto batch = batcher.update(); if (batch) { batch->promise.set_value(); future.wait(); } } state.SetItemsProcessed(static_cast(state.iterations())); } struct audio_state { const std::uint16_t* data; std::size_t size; std::shared_ptr state; }; static void batcher_standard_batcher_audio(benchmark::State& state) { StandardBatcher batcher(state.range(0)); std::size_t counter = 0; for (auto _ : state) { auto future = batcher.enqueue({nullptr, 0ul, nullptr}); auto batch = batcher.update(); if (batch) { batch->promise.set_value(); future.wait(); } } state.SetItemsProcessed(static_cast(state.iterations())); } static void batcher_engine(benchmark::State& state) { const std::size_t batch_size = state.range(0); auto execute_on_batch = [](const std::vector& batch, std::function free_inputs) { free_inputs(); }; auto thread_pool = std::make_shared(1); auto task_pool = std::make_shared(); StandardBatcher batcher(batch_size); Dispatcher dispatcher(std::move(batcher), std::chrono::milliseconds(15), thread_pool, task_pool, execute_on_batch); std::queue> f; int pre_load = 3; for (int i = 0; i < pre_load; i++) { f.push(dispatcher.enqueue(0)); } for (int i = 0; i < (batch_size - 1) * pre_load; i++) { dispatcher.enqueue(i); } for (auto _ : state) { f.push(dispatcher.enqueue(0)); for (int i = 0; i < batch_size - 1; i++) { dispatcher.enqueue(i); } f.front().wait(); f.pop(); } while (!f.empty()) { f.front().wait(); f.pop(); } state.SetItemsProcessed(static_cast(state.iterations()) * state.range(0)); } BENCHMARK(batcher_standard_batcher_int)->RangeMultiplier(2)->Range(1, 1 << 7); BENCHMARK(batcher_standard_batcher_audio)->RangeMultiplier(2)->Range(1 << 6, 1 << 7); BENCHMARK(batcher_engine)->RangeMultiplier(2)->Range(4, 1 << 6)->UseRealTime()->MinTime(3.0); ; ================================================ FILE: trtlab/core/benchmarks/bench_memory.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "trtlab/core/memory/allocator.h" #include "trtlab/core/memory/malloc.h" #include "trtlab/core/memory/sysv_allocator.h" using namespace trtlab; using namespace trtlab; static void BM_Memory_SystemMalloc(benchmark::State& state) { for(auto _ : state) { //auto unique = std::make_unique>(1024 * 1024); //auto shared = std::make_shared>(1024 * 1024); Allocator memory1(1024 * 1024); Allocator memory2(1024 * 1024); Allocator memory3(1024 * 1024); } } static void BM_Memory_SystemV_descriptor(benchmark::State& state) { auto master = std::make_unique>(1024 * 1024); for(auto _ : state) { auto mdesc = SystemV::Attach(master->ShmID()); } } /* static void BM_Memory_HostDescriptor(benchmark::State& state) { void *ptr = (void*)0xDEADBEEF; mem_size_t size = 1337; for(auto _ : state) { nextgen::HostDescriptor hdesc(ptr, size, []{}); } } */ /* static void BM_Memory_SharedHostDescriptor(benchmark::State& state) { void *ptr = (void*)0xDEADBEEF; mem_size_t size = 1337; for(auto _ : state) { nextgen::Descriptor hdesc(ptr, size, []{}); auto shared = std::make_shared>(std::move(hdesc)); } } */ /* static void BM_Memory_NextGenMalloc(benchmark::State& state) { static mem_size_t one_mb = 1024*1024; for(auto _ : state) { auto hdesc0 = nextgen::Malloc::Allocate(one_mb); auto hdesc1 = nextgen::Malloc::Allocate(one_mb); } } */ BENCHMARK(BM_Memory_SystemMalloc); BENCHMARK(BM_Memory_SystemV_descriptor); // BENCHMARK(BM_Memory_HostDescriptor); // BENCHMARK(BM_Memory_SharedHostDescriptor); // BENCHMARK(BM_Memory_NextGenMalloc); ================================================ FILE: trtlab/core/benchmarks/bench_memory_stack.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "trtlab/core/hybrid_mutex.h" #include "trtlab/core/memory.h" #include using namespace trtlab; using namespace trtlab; static void allocators_transactional_raw(benchmark::State& state) { using namespace memory::literals; auto alloc = memory::make_allocator_adapter(memory::malloc_allocator()); auto block_alloc = memory::trtlab::make_growth_capped_block_allocator(1_MiB, 4u, std::move(alloc)); auto trans_alloc = memory::trtlab::make_transactional_allocator(std::move(block_alloc)); trans_alloc.reserve_blocks(4u); for(auto _ : state) { for(int i=0; i < state.range(0); i++) { auto ptr = trans_alloc.allocate_node(1024, 64); trans_alloc.deallocate_node(ptr, 0u, 0u); } } } static void allocators_transactional_std(benchmark::State& state) { using namespace memory::literals; auto alloc = memory::make_allocator_adapter(memory::malloc_allocator()); auto block_alloc = memory::trtlab::make_growth_capped_block_allocator(1_MiB, 4u, std::move(alloc)); auto trans_alloc = memory::trtlab::make_transactional_allocator(std::move(block_alloc)); trans_alloc.reserve_blocks(4u); auto smart = memory::trtlab::make_allocator(std::move(trans_alloc)); for(auto _ : state) { for(int i=0; i < state.range(0); i++) { auto ptr = smart.allocate_node(1024, 64); smart.deallocate_node(ptr, 0u, 0u); } } } static void allocators_transactional_md(benchmark::State& state) { using namespace memory::literals; auto alloc = memory::make_allocator_adapter(memory::malloc_allocator()); auto block_alloc = memory::trtlab::make_growth_capped_block_allocator(1_MiB, 4u, std::move(alloc)); auto trans_alloc = memory::trtlab::make_transactional_allocator(std::move(block_alloc)); trans_alloc.reserve_blocks(4u); auto smart = memory::trtlab::make_allocator(std::move(trans_alloc)); for(auto _ : state) { for(int i=0; i < state.range(0); i++) { auto md = smart.allocate_descriptor(1024, 64); } } } #if 0 template using custom_vector = std::vector>; template auto make_vector(RawAllocator& alloc) { using std_allocator = memory::std_allocator; return std::vector(std_allocator(alloc)); } static void BM_vector_transactional(benchmark::State& state) { using namespace memory::literals; auto malloc = memory::make_allocator_reference(memory::MallocAllocator()); auto block_alloc = memory::trtlab::make_growth_capped_block_allocator(128_MiB, 8, std::move(malloc)); auto trans_alloc = memory::trtlab::make_transactional_allocator(std::move(block_alloc)); trans_alloc.reserve_blocks(8); for(auto _ : state) { auto vec = make_vector(trans_alloc); vec.reserve(1024*1024*8); } } static void BM_vector_smart_transactional(benchmark::State& state) { using namespace memory::literals; auto malloc = memory::make_allocator_reference(memory::MallocAllocator()); auto block_alloc = memory::trtlab::make_growth_capped_block_allocator(128_MiB, 8, std::move(malloc)); auto trans_alloc = memory::trtlab::make_transactional_allocator(std::move(block_alloc)); trans_alloc.reserve_blocks(8); auto smart = memory::trtlab::make_allocator(std::move(trans_alloc)); for(auto _ : state) { auto vec = memory::trtlab::make_vector(smart); vec.reserve(1024*1024*8); } } static void BM_CyclicAllocator_stl_allocator(benchmark::State& state) { { auto v0 = custom_vector(1024); } for(auto _ : state) { custom_vector vector; vector.reserve(1024*1024*8); } } static void BM_CyclicAllocator_stl_allocator2(benchmark::State& state) { size_t ctr = 1024; for(auto _ : state) { custom_vector v3; v3.reserve(ctr*ctr*8); } } static void BM_vector_default(benchmark::State& state) { size_t ctr = 1024; for(auto _ : state) { std::vector vec; vec.reserve(1024*1024*8); } } static void BM_stl_allocator_ctor(benchmark::State& state) { for(auto _ : state) { auto a = stl::temporary_allocator(); } } static void BM_stl_allocator_allocate_lifecycle(benchmark::State& state) { for(auto _ : state) { auto a = stl::temporary_allocator(); auto i = a.allocate(1024); a.deallocate(i, 1024); } } #endif BENCHMARK(allocators_transactional_raw)->RangeMultiplier(2)->Range(1, 1 << 2); BENCHMARK(allocators_transactional_std)->RangeMultiplier(2)->Range(1, 1 << 0); BENCHMARK(allocators_transactional_md)->RangeMultiplier(2)->Range(1, 1 << 0); ================================================ FILE: trtlab/core/benchmarks/bench_pool.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "trtlab/core/pool.h" #include "trtlab/core/userspace_threads.h" #include static void BM_Pool_v1_Pop(benchmark::State& state) { using trtlab::v1::Pool; struct Object { }; auto pool = Pool::Create(); pool->EmplacePush(new Object); for(auto _ : state) { auto obj = pool->Pop(); } } static void BM_Pool_v2_Pop(benchmark::State& state) { using trtlab::v2::Pool; struct Object { }; auto pool = Pool::Create(); pool->EmplacePush(); for(auto _ : state) { auto obj = pool->Pop(); } } static void BM_Pool_v3_Pop(benchmark::State& state) { using trtlab::v3::Pool; struct Object { }; auto pool = Pool::Create(); pool->emplace_push(); for(auto _ : state) { auto obj = std::move(pool->pop()); } } static void BM_Pool_v4_Pop(benchmark::State& state) { using trtlab::v4::Pool; struct Object { }; auto pool = Pool::Create(); pool->EmplacePush(); for(auto _ : state) { auto obj = std::move(pool->pop_unique()); } } static void BM_Pool_v4_Pop_Shared(benchmark::State& state) { using trtlab::v4::Pool; struct Object { }; auto pool = Pool::Create(); pool->emplace_push(); for(auto _ : state) { auto obj = std::move(pool->pop_shared()); } } static void BM_Pool_v3_Pop_Userspace(benchmark::State& state) { using trtlab::v3::Pool; struct Object { }; auto pool = Pool::Create(); pool->emplace_push(); for(auto _ : state) { auto obj = std::move(pool->pop()); } } BENCHMARK(BM_Pool_v1_Pop); BENCHMARK(BM_Pool_v2_Pop); BENCHMARK(BM_Pool_v3_Pop); BENCHMARK(BM_Pool_v4_Pop); BENCHMARK(BM_Pool_v4_Pop_Shared); BENCHMARK(BM_Pool_v3_Pop_Userspace); ================================================ FILE: trtlab/core/benchmarks/bench_thread_pool.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "trtlab/core/hybrid_condition.h" #include "trtlab/core/hybrid_mutex.h" #include "trtlab/core/thread_pool.h" #include static void BM_ThreadPool_Enqueue(benchmark::State& state) { using trtlab::ThreadPool; auto pool = std::make_unique(1); for(auto _ : state) { CHECK(pool); // enqueue only auto future = pool->enqueue([] {}); //future.get(); } } BENCHMARK(BM_ThreadPool_Enqueue)->UseRealTime(); static void BM_HybridThreadPool_Enqueue(benchmark::State& state) { using trtlab::BaseThreadPool; auto pool = std::make_unique>(1); for(auto _ : state) { auto future = pool->enqueue([] {}); } } BENCHMARK(BM_HybridThreadPool_Enqueue)->UseRealTime(); ================================================ FILE: trtlab/core/benchmarks/main.cc ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include BENCHMARK_MAIN(); ================================================ FILE: trtlab/core/include/trtlab/core/affinity.h ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include #include "cpuaff/cpuaff.hpp" namespace trtlab { struct affinity; class cpu_set final : public cpuaff::cpu_set { public: using cpuaff::cpu_set::cpu_set; cpu_set get_intersection(const cpu_set& other) const; cpu_set get_union(const cpu_set& other) const; cpu_set get_difference(const cpu_set& other) const; static cpu_set from_string(std::string); std::string cpus_string() const; std::string cores_string() const; std::string sockets_string() const; auto get_allocator() const -> cpuaff::round_robin_allocator { return cpuaff::round_robin_allocator(*this); }; friend std::ostream& operator<<(std::ostream& s, const cpu_set& cpus); }; std::ostream& operator<<(std::ostream& s, const cpu_set& cpus); class affinity_guard final { // hold the original affinity of the calling thread // the original affinity will be restored on destruction cpu_set m_original_cpus; public: affinity_guard(); explicit affinity_guard(const cpu_set&); ~affinity_guard(); affinity_guard(const affinity_guard&) = delete; affinity_guard& operator=(const affinity_guard&) = delete; affinity_guard(affinity_guard&&) noexcept = delete; affinity_guard& operator=(affinity_guard&&) noexcept = delete; }; struct numa_node { unsigned id; cpu_set cpus; std::vector distances; friend std::ostream& operator<<(std::ostream& s, const numa_node& cpus); }; std::ostream& operator<<(std::ostream& s, const numa_node& cpus); struct affinity final { struct this_thread final { static cpu_set get_affinity(); static void set_affinity(const cpu_set&); }; struct system final { // static cpu_set cpus_by_numa(int numa_id); // static cpu_set cpus_by_socket(int socket_id); // static cpu_set cpus_by_core(int core_id); // static cpu_set cpus_by_hyperthread(int thread_id); static cpuaff::cpu cpu_from_logical_id(int id); static std::vector topology(); }; }; } // namespace trtlab ================================================ FILE: trtlab/core/include/trtlab/core/async_compute.h ================================================ /* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include #include namespace trtlab { namespace async { namespace detail { template