Copy disabled (too large)
Download .txt
Showing preview only (10,114K chars total). Download the full file to get everything.
Repository: triton-inference-server/server
Branch: main
Commit: f642e5343589
Files: 1631
Total size: 9.3 MB
Directory structure:
gitextract_5r79t389/
├── .clang-format
├── .dockerignore
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ └── feature_request.md
│ ├── PULL_REQUEST_TEMPLATE/
│ │ ├── pull_request_template_external_contrib.md
│ │ └── pull_request_template_internal_contrib.md
│ ├── pull_request_template.md
│ └── workflows/
│ ├── codeql.yml
│ └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Dockerfile.QA
├── Dockerfile.sdk
├── Dockerfile.win10.min
├── LICENSE
├── README.md
├── SECURITY.md
├── TRITON_VERSION
├── build.py
├── compose.py
├── deploy/
│ ├── alibaba-cloud/
│ │ └── README.md
│ ├── aws/
│ │ ├── Chart.yaml
│ │ ├── README.md
│ │ ├── dashboard.json
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ ├── secrets.yaml
│ │ │ └── service.yaml
│ │ └── values.yaml
│ ├── fleetcommand/
│ │ ├── Chart.yaml
│ │ ├── README.md
│ │ ├── dashboard.json
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── configmap-grafana-dashboard.yaml
│ │ │ ├── deployment.yaml
│ │ │ ├── secrets.yaml
│ │ │ └── service.yaml
│ │ └── values.yaml
│ ├── gcp/
│ │ ├── Chart.yaml
│ │ ├── README.md
│ │ ├── dashboard.json
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ └── service.yaml
│ │ └── values.yaml
│ ├── gke-marketplace-app/
│ │ ├── README.md
│ │ ├── benchmark/
│ │ │ ├── README.md
│ │ │ ├── model-store/
│ │ │ │ ├── bert_base_tf_cpu/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── bert_base_tf_gpu/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── bert_base_trt_gpu/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── bert_base_trt_gpu_seqlen128/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── bert_distill_tf_cpu/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ └── bert_distill_tf_gpu/
│ │ │ │ └── config.pbtxt
│ │ │ └── perf-analyzer-script/
│ │ │ ├── perf_query.sh
│ │ │ └── triton_client.yaml
│ │ ├── client-sample/
│ │ │ ├── bert_request.json
│ │ │ ├── locustfile_bert.py
│ │ │ └── perf_analyzer_grpc.sh
│ │ ├── server-deployer/
│ │ │ ├── Dockerfile
│ │ │ ├── build_and_push.sh
│ │ │ ├── chart/
│ │ │ │ └── triton/
│ │ │ │ ├── Chart.yaml
│ │ │ │ ├── templates/
│ │ │ │ │ ├── _helpers.tpl
│ │ │ │ │ ├── application.yaml
│ │ │ │ │ ├── deployment.yaml
│ │ │ │ │ ├── hpa.yaml
│ │ │ │ │ ├── ingress.yaml
│ │ │ │ │ └── service.yaml
│ │ │ │ └── values.yaml
│ │ │ ├── data-test/
│ │ │ │ └── schema.yaml
│ │ │ └── schema.yaml
│ │ └── trt-engine/
│ │ └── README.md
│ ├── k8s-onprem/
│ │ ├── Chart.yaml
│ │ ├── README.md
│ │ ├── dashboard.json
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ ├── hpa.yaml
│ │ │ ├── ingressroute.yaml
│ │ │ ├── rbac.yaml
│ │ │ ├── service.yaml
│ │ │ └── serviceaccount.yaml
│ │ └── values.yaml
│ ├── mlflow-triton-plugin/
│ │ ├── README.md
│ │ ├── examples/
│ │ │ ├── expected_output.json
│ │ │ ├── input.json
│ │ │ └── onnx_float32_int32_int32/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── mlflow_triton/
│ │ │ ├── __init__.py
│ │ │ ├── config.py
│ │ │ └── deployments.py
│ │ ├── scripts/
│ │ │ ├── publish_model_to_mlflow.py
│ │ │ └── triton_flavor.py
│ │ └── setup.py
│ └── oci/
│ ├── Chart.yaml
│ ├── README.md
│ ├── dashboard.json
│ ├── templates/
│ │ ├── _helpers.tpl
│ │ ├── deployment.yaml
│ │ ├── secrets.yaml
│ │ └── service.yaml
│ └── values.yaml
├── docker/
│ ├── README.third-party-src
│ ├── cpu_only/
│ │ ├── entrypoint.d/
│ │ │ ├── 12-banner.sh
│ │ │ └── 50-gpu-driver-check2.sh
│ │ └── nvidia_entrypoint.sh
│ ├── entrypoint.d/
│ │ ├── 10-banner.txt
│ │ ├── 15-container-copyright.txt
│ │ ├── 50-gpu-driver-check2.sh
│ │ ├── 56-network-driver-version-check.sh
│ │ ├── 70-shm-check.sh
│ │ └── 99-check-run-aip-mode.sh
│ └── sagemaker/
│ └── serve
├── docs/
│ ├── Dockerfile.docs
│ ├── Makefile
│ ├── README.md
│ ├── _reference/
│ │ └── tritonclient_api.rst
│ ├── _static/
│ │ ├── .gitattributes
│ │ ├── custom.css
│ │ └── rtd-data.js
│ ├── _templates/
│ │ └── layout.html
│ ├── backend_guide/
│ │ └── vllm.rst
│ ├── client_guide/
│ │ ├── api_reference.rst
│ │ ├── in_process.rst
│ │ ├── kserve.rst
│ │ ├── kserve_extension.rst
│ │ └── python.rst
│ ├── conf.py
│ ├── contents.rst
│ ├── customization_guide/
│ │ ├── build.md
│ │ ├── compose.md
│ │ ├── deploy.md
│ │ ├── inference_protocols.md
│ │ ├── inprocess_c_api.md
│ │ ├── inprocess_java_api.md
│ │ ├── repository_agents.md
│ │ ├── sagemaker.md
│ │ ├── test.md
│ │ └── tritonfrontend.md
│ ├── examples/
│ │ ├── README.md
│ │ ├── fetch_models.sh
│ │ ├── jetson/
│ │ │ ├── README.md
│ │ │ └── concurrency_and_dynamic_batching/
│ │ │ ├── Makefile
│ │ │ ├── README.md
│ │ │ ├── common.h
│ │ │ ├── labels.txt
│ │ │ ├── people_detection.cc
│ │ │ ├── tao/
│ │ │ │ ├── convert_peoplenet.sh
│ │ │ │ └── models/
│ │ │ │ └── peoplenet/
│ │ │ │ └── .gitkeep
│ │ │ ├── trtis_model_repo_sample_1/
│ │ │ │ └── peoplenet/
│ │ │ │ ├── 1/
│ │ │ │ │ └── .gitkeep
│ │ │ │ └── config.pbtxt
│ │ │ └── trtis_model_repo_sample_2/
│ │ │ └── peoplenet/
│ │ │ ├── 1/
│ │ │ │ └── .gitkeep
│ │ │ └── config.pbtxt
│ │ └── model_repository/
│ │ ├── densenet_onnx/
│ │ │ ├── config.pbtxt
│ │ │ └── densenet_labels.txt
│ │ ├── inception_onnx/
│ │ │ ├── config.pbtxt
│ │ │ └── inception_labels.txt
│ │ ├── simple/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── simple_dyna_sequence/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── simple_identity/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── simple_int8/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── simple_sequence/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ └── simple_string/
│ │ ├── 1/
│ │ │ └── model.onnx
│ │ └── config.pbtxt
│ ├── exclusions.txt
│ ├── generate_docs.py
│ ├── getting_started/
│ │ ├── llm.md
│ │ ├── quick_deployment.rst
│ │ ├── quickstart.md
│ │ └── trtllm_user_guide.md
│ ├── index.md
│ ├── introduction/
│ │ ├── compatibility.md
│ │ ├── index.md
│ │ └── release_notes.md
│ ├── llm_features/
│ │ └── speculative_decoding.rst
│ ├── perf_benchmark/
│ │ ├── genai_perf.rst
│ │ ├── model_analyzer.rst
│ │ └── perf_analyzer.rst
│ ├── protocol/
│ │ ├── README.md
│ │ ├── extension_binary_data.md
│ │ ├── extension_classification.md
│ │ ├── extension_generate.md
│ │ ├── extension_logging.md
│ │ ├── extension_model_configuration.md
│ │ ├── extension_model_repository.md
│ │ ├── extension_parameters.md
│ │ ├── extension_schedule_policy.md
│ │ ├── extension_sequence.md
│ │ ├── extension_shared_memory.md
│ │ ├── extension_statistics.md
│ │ └── extension_trace.md
│ ├── repositories.txt
│ ├── scaling_guide/
│ │ └── scaling_guide.rst
│ ├── server_guide/
│ │ ├── features.rst
│ │ ├── model_pipelines.rst
│ │ └── state_management.rst
│ └── user_guide/
│ ├── architecture.md
│ ├── batcher.md
│ ├── bls.md
│ ├── custom_operations.md
│ ├── debugging_guide.md
│ ├── decoupled_models.md
│ ├── ensemble_models.md
│ ├── faq.md
│ ├── implicit_state_management.md
│ ├── jetson.md
│ ├── metrics.md
│ ├── model_analyzer.md
│ ├── model_configuration.md
│ ├── model_execution.md
│ ├── model_management.md
│ ├── model_repository.md
│ ├── optimization.md
│ ├── perf_analyzer.md
│ ├── performance_tuning.md
│ ├── ragged_batching.md
│ ├── rate_limiter.md
│ ├── request_cancellation.md
│ ├── response_cache.md
│ ├── scheduler.md
│ ├── trace.md
│ └── v1_to_v2.md
├── enhancements/
│ ├── NNNN-template-complete.md
│ ├── NNNN-template-limited.md
│ ├── README.md
│ └── teps/
│ └── 0000-tep-process.md
├── pyproject.toml
├── python/
│ └── openai/
│ ├── README.md
│ ├── openai_frontend/
│ │ ├── __init__.py
│ │ ├── engine/
│ │ │ ├── __init__.py
│ │ │ ├── engine.py
│ │ │ ├── triton_engine.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── chat.py
│ │ │ ├── tokenizer.py
│ │ │ ├── tool_call_parsers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── llama_tool_call_parser.py
│ │ │ │ ├── mistral_tool_call_parser.py
│ │ │ │ ├── tool_call_parser.py
│ │ │ │ └── utils.py
│ │ │ └── triton.py
│ │ ├── frontend/
│ │ │ ├── __init__.py
│ │ │ ├── fastapi/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── middleware/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── api_restriction.py
│ │ │ │ └── routers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── chat.py
│ │ │ │ ├── completions.py
│ │ │ │ ├── embeddings.py
│ │ │ │ ├── models.py
│ │ │ │ └── observability.py
│ │ │ ├── fastapi_frontend.py
│ │ │ └── frontend.py
│ │ ├── main.py
│ │ ├── schemas/
│ │ │ ├── __init__.py
│ │ │ └── openai.py
│ │ └── utils/
│ │ └── utils.py
│ ├── requirements-test.txt
│ ├── requirements.txt
│ └── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_chat_completions.py
│ ├── test_completions.py
│ ├── test_embeddings.py
│ ├── test_lora.py
│ ├── test_models/
│ │ ├── identity_py/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ └── mock_llm/
│ │ ├── 1/
│ │ │ └── model.py
│ │ └── config.pbtxt
│ ├── test_observability.py
│ ├── test_openai_client.py
│ ├── test_openai_restricted_apis.py
│ ├── test_tool_calling.py
│ ├── utils.py
│ ├── vllm_embedding_models/
│ │ └── all-MiniLM-L6-v2/
│ │ ├── 1/
│ │ │ └── model.json
│ │ └── config.pbtxt
│ ├── vllm_mistral_models/
│ │ └── mistral-nemo-instruct-2407/
│ │ ├── 1/
│ │ │ └── model.json
│ │ └── config.pbtxt
│ └── vllm_models/
│ └── llama-3.1-8b-instruct/
│ ├── 1/
│ │ └── model.json
│ └── config.pbtxt
├── qa/
│ ├── L0_additional_dependency_dirs/
│ │ └── test.sh
│ ├── L0_async_work_queue/
│ │ └── test.sh
│ ├── L0_backend_bls/
│ │ └── test.sh
│ ├── L0_backend_config/
│ │ └── test.sh
│ ├── L0_backend_fastertransformer/
│ │ └── test.sh
│ ├── L0_backend_identity/
│ │ ├── identity_test.py
│ │ └── test.sh
│ ├── L0_backend_onnxruntime/
│ │ ├── gen_add_bf16_onnx_model.py
│ │ ├── test.py
│ │ └── test.sh
│ ├── L0_backend_output_detail/
│ │ └── test.sh
│ ├── L0_backend_python/
│ │ ├── argument_validation/
│ │ │ ├── models/
│ │ │ │ └── argument_validation/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── test.sh
│ │ ├── async_execute/
│ │ │ ├── concurrency_test.py
│ │ │ └── test.sh
│ │ ├── bls/
│ │ │ ├── bls_parameters_test.py
│ │ │ └── test.sh
│ │ ├── common.sh
│ │ ├── custom_metrics/
│ │ │ └── test.sh
│ │ ├── decoupled/
│ │ │ ├── decoupled_test.py
│ │ │ ├── models/
│ │ │ │ ├── decoupled_bls/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_bls_async_cancel/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_bls_cancel/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_bls_cancel_after_complete/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_bls_stream/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_execute_error/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_raise_exception/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_return_response_error/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ └── decoupled_send_after_close_error/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── test.sh
│ │ ├── ensemble/
│ │ │ ├── ensemble_test.py
│ │ │ └── test.sh
│ │ ├── env/
│ │ │ └── test.sh
│ │ ├── examples/
│ │ │ └── test.sh
│ │ ├── io/
│ │ │ ├── io_test.py
│ │ │ ├── requested_output_model/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── model.py
│ │ │ └── test.sh
│ │ ├── lifecycle/
│ │ │ ├── lifecycle_test.py
│ │ │ └── test.sh
│ │ ├── logging/
│ │ │ ├── logging_test.py
│ │ │ └── test.sh
│ │ ├── model_control/
│ │ │ ├── model_control_test.py
│ │ │ └── test.sh
│ │ ├── model_readiness/
│ │ │ ├── test.sh
│ │ │ ├── test_model_readiness.py
│ │ │ └── test_models/
│ │ │ ├── is_ready_fn_returns_true_decoupled/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── model.py
│ │ │ ├── readiness_coroutine_model.py
│ │ │ └── readiness_model.py
│ │ ├── parameters/
│ │ │ ├── response_parameters_test.py
│ │ │ └── test.sh
│ │ ├── python_based_backends/
│ │ │ ├── python_based_backends_test.py
│ │ │ └── test.sh
│ │ ├── python_test.py
│ │ ├── request_rescheduling/
│ │ │ ├── grpc_endpoint_test.py
│ │ │ └── test.sh
│ │ ├── response_sender/
│ │ │ ├── response_sender_complete_final_test.py
│ │ │ ├── response_sender_test.py
│ │ │ └── test.sh
│ │ ├── restart/
│ │ │ ├── models/
│ │ │ │ └── restart/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ ├── restart_test.py
│ │ │ └── test.sh
│ │ ├── setup_python_enviroment.sh
│ │ ├── test.sh
│ │ ├── test_infer_shm_leak.py
│ │ └── variants/
│ │ └── test.sh
│ ├── L0_backend_release/
│ │ └── test.sh
│ ├── L0_backend_tutorial/
│ │ └── test.sh
│ ├── L0_batch_custom/
│ │ ├── batch_custom_test.py
│ │ └── test.sh
│ ├── L0_batch_input/
│ │ ├── batch_input_test.py
│ │ └── test.sh
│ ├── L0_batcher/
│ │ ├── batcher_test.py
│ │ ├── queue_timeout_test.py
│ │ ├── test.sh
│ │ └── verify_timestamps.py
│ ├── L0_buffer_attributes/
│ │ ├── buffer_attributes_test.py
│ │ ├── models/
│ │ │ ├── bls/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── identity/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_client_build_variants/
│ │ └── test.sh
│ ├── L0_client_java/
│ │ └── test.sh
│ ├── L0_client_memory_growth/
│ │ ├── client_memory_mail.py
│ │ ├── models/
│ │ │ └── custom_identity_int32/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_client_nobatch/
│ │ ├── client_test.py
│ │ └── test.sh
│ ├── L0_client_timeout/
│ │ ├── client_infer_timeout_test.py
│ │ ├── client_non_infer_timeout_test.py
│ │ ├── models/
│ │ │ └── custom_identity_int32/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_client_valgrind/
│ │ ├── models/
│ │ │ └── custom_identity_int32/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_cmdline_trace/
│ │ ├── test.sh
│ │ └── trace_client.py
│ ├── L0_compute_capability/
│ │ └── test.sh
│ ├── L0_config_json/
│ │ ├── ensemble_config.pbtxt
│ │ ├── max_priority_level.pbtxt
│ │ └── test.sh
│ ├── L0_cuda_graph/
│ │ ├── test.sh
│ │ └── trt_cuda_graph_test.py
│ ├── L0_cuda_shared_memory/
│ │ ├── cuda_shared_memory_test.py
│ │ └── test.sh
│ ├── L0_custom_model_config/
│ │ └── test.sh
│ ├── L0_custom_ops/
│ │ ├── mod_op_test.py
│ │ ├── onnx_op_test.py
│ │ ├── test.sh
│ │ └── vision_op_test.py
│ ├── L0_data_compression/
│ │ ├── test.sh
│ │ └── validation.py
│ ├── L0_decoupled/
│ │ ├── decoupled_test.py
│ │ ├── models/
│ │ │ ├── fan_repeat/
│ │ │ │ └── config.pbtxt
│ │ │ ├── identity_int32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── nested_square/
│ │ │ │ └── config.pbtxt
│ │ │ ├── repeat_square/
│ │ │ │ └── config.pbtxt
│ │ │ ├── sequence_repeat/
│ │ │ │ └── config.pbtxt
│ │ │ └── simple_repeat/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_device_memory_tracker/
│ │ ├── test.py
│ │ └── test.sh
│ ├── L0_dlpack_multi_gpu/
│ │ └── test.sh
│ ├── L0_doc_links/
│ │ ├── mkdocs.yml
│ │ └── test.sh
│ ├── L0_dyna_implicit_state/
│ │ └── test.sh
│ ├── L0_dyna_sequence_batcher/
│ │ ├── dyna_sequence_batcher_test.py
│ │ └── test.sh
│ ├── L0_grpc/
│ │ ├── client_plugin_models/
│ │ │ └── client_plugin_test/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── grpc_basic_auth_test.py
│ │ ├── grpc_client_plugin_test.py
│ │ ├── nginx.conf
│ │ ├── python_grpc_aio_test.py
│ │ ├── python_unit_test.py
│ │ └── test.sh
│ ├── L0_grpc_state_cleanup/
│ │ ├── cleanup_test.py
│ │ └── test.sh
│ ├── L0_http/
│ │ ├── generate_endpoint_test.py
│ │ ├── http_basic_auth_test.py
│ │ ├── http_client_plugin_test.py
│ │ ├── http_input_size_limit_test.py
│ │ ├── http_request_many_chunks.py
│ │ ├── http_restricted_api_test.py
│ │ ├── http_test.py
│ │ ├── nginx.conf
│ │ ├── python_http_aio_test.py
│ │ └── test.sh
│ ├── L0_http_fuzz/
│ │ ├── fuzztest.py
│ │ └── test.sh
│ ├── L0_https/
│ │ ├── nginx.conf
│ │ └── test.sh
│ ├── L0_implicit_state/
│ │ ├── implicit_state.py
│ │ ├── models/
│ │ │ ├── growable_memory/
│ │ │ │ └── config.pbtxt
│ │ │ ├── no_implicit_state/
│ │ │ │ └── config.pbtxt
│ │ │ ├── no_state_update/
│ │ │ │ └── config.pbtxt
│ │ │ ├── single_state_buffer/
│ │ │ │ └── config.pbtxt
│ │ │ └── wrong_internal_state/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_infer/
│ │ ├── infer_test.py
│ │ ├── install_and_test.sh
│ │ └── test.sh
│ ├── L0_infer_reshape/
│ │ ├── infer_reshape_test.py
│ │ └── test.sh
│ ├── L0_infer_variable/
│ │ ├── infer_variable_test.py
│ │ └── test.sh
│ ├── L0_infer_zero/
│ │ ├── infer_zero_test.py
│ │ └── test.sh
│ ├── L0_input_validation/
│ │ ├── input_validation_test.py
│ │ ├── models/
│ │ │ ├── input_all_optional/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ ├── input_all_required/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── input_optional/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_io/
│ │ ├── gen_libtorch_model.py
│ │ └── test.sh
│ ├── L0_iterative_sequence/
│ │ ├── iterative_sequence_e2e.py
│ │ ├── models/
│ │ │ └── iterative_sequence/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_java_memory_growth/
│ │ ├── MemoryGrowthTest.java
│ │ └── test.sh
│ ├── L0_java_resnet/
│ │ ├── ResnetTest.java
│ │ ├── expected_output_data/
│ │ │ ├── expected_output_onnx.txt
│ │ │ ├── expected_output_pytorch.txt
│ │ │ └── expected_output_tensorflow.txt
│ │ └── test.sh
│ ├── L0_java_sequence_batcher/
│ │ ├── SequenceTest.java
│ │ └── test.sh
│ ├── L0_java_simple_example/
│ │ └── test.sh
│ ├── L0_json/
│ │ └── test.sh
│ ├── L0_large_payload/
│ │ ├── large_payload_test.py
│ │ └── test.sh
│ ├── L0_libtorch_disable_cudnn/
│ │ └── test.sh
│ ├── L0_libtorch_inference_mode/
│ │ └── test.sh
│ ├── L0_libtorch_instance_group_kind_model/
│ │ ├── client.py
│ │ ├── gen_models.py
│ │ ├── models/
│ │ │ └── libtorch_multi_device/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_libtorch_io_names/
│ │ ├── io_names_client.py
│ │ └── test.sh
│ ├── L0_libtorch_io_types/
│ │ └── test.sh
│ ├── L0_libtorch_optimized_execution/
│ │ └── test.sh
│ ├── L0_libtorch_shared_weights/
│ │ ├── libtorch_shared_weights_test.py
│ │ └── test.sh
│ ├── L0_lifecycle/
│ │ ├── ensemble_zero_1_float32/
│ │ │ └── config.pbtxt
│ │ ├── identity_zero_1_int32/
│ │ │ └── config.pbtxt
│ │ ├── lifecycle_test.py
│ │ ├── retry_model/
│ │ │ └── 1/
│ │ │ └── model.py
│ │ └── test.sh
│ ├── L0_logging/
│ │ ├── log_format_test.py
│ │ ├── logging_endpoint_test.py
│ │ └── test.sh
│ ├── L0_long_running_stress/
│ │ ├── crashing_client.py
│ │ ├── scenarios.py
│ │ ├── stress.py
│ │ ├── stress_mail.py
│ │ └── test.sh
│ ├── L0_memory/
│ │ ├── client.py
│ │ └── test.sh
│ ├── L0_memory_growth/
│ │ ├── busy_op_test.py
│ │ ├── server_memory_mail.py
│ │ └── test.sh
│ ├── L0_metrics/
│ │ ├── cpu_metrics_test.py
│ │ ├── ensemble_decoupled/
│ │ │ ├── async_execute_decouple/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── ensemble/
│ │ │ └── config.pbtxt
│ │ ├── ensemble_delay/
│ │ │ └── config.pbtxt
│ │ ├── histogram_metrics_test.py
│ │ ├── identity_delay/
│ │ │ └── config.pbtxt
│ │ ├── metrics_config_test.py
│ │ ├── metrics_queue_size_test.py
│ │ ├── model_namespacing_repos/
│ │ │ ├── addsub_repo/
│ │ │ │ ├── addsub_ensemble/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ └── composing_model/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── subadd_repo/
│ │ │ ├── composing_model/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── subadd_ensemble/
│ │ │ └── config.pbtxt
│ │ ├── pinned_memory_metrics_test.py
│ │ ├── test.sh
│ │ └── unit_test_models/
│ │ ├── identity_cache_off/
│ │ │ └── config.pbtxt
│ │ └── identity_cache_on/
│ │ └── config.pbtxt
│ ├── L0_mlflow/
│ │ ├── plugin_test.py
│ │ └── test.sh
│ ├── L0_model_config/
│ │ ├── autofill_noplatform/
│ │ │ ├── common/
│ │ │ │ └── no_version/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── custom/
│ │ │ │ ├── no_delimiter/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unknown_backend.unknown/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── ensemble/
│ │ │ │ ├── circular_dependency/
│ │ │ │ │ ├── circular_dependency/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── circular_dependency_2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── expected_2
│ │ │ │ ├── ensemble_scheduling_no_set/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── has_backend/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── inconsistent_data_type/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── inconsistent_data_type/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── int32_dim1_batch4/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── inconsistent_shape/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected_2
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim3_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── inconsistent_shape/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── instance_group_set/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── invalid_batch_size/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── invalid_batch_size/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── invalid_decoupled_branching/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── int32_dim1_nobatch_output2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── invalid_decoupled_branching/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── repeat_int32/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── invalid_decoupled_branching_2/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── invalid_decoupled_branching_2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── repeat_int32/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── invalid_input_map/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_input4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── invalid_input_map/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── invalid_output_map/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_input4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── invalid_output_map/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── model_warm_up_set/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_input_map/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_model_name/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_output_map/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_required_version/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── no_required_version/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── simple/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── no_required_version_2/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── no_required_version_2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── simple/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── no_required_version_3/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── no_required_version_3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── simple/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── no_step/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_step_2/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── non_existing_model/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── non_existing_model/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── optimization_set/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── output_to_tensor_overmapped/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── redundant_tensor_as_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── expected_2
│ │ │ │ ├── redundant_tensor_as_output/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── self_circular_dependency/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_input4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── self_circular_dependency/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── tensor_to_input_overmapped/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unmapped_input/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_input4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── unmapped_input/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── unreachable_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unreachable_output/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unreachable_output_2/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unreachable_output_3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── onnx/
│ │ │ │ ├── bad_input_dims/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── bad_max_batch_size/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── bad_output_dims/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── too_few_inputs/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── too_many_inputs/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unknown_input/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unknown_output/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.onnx
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── openvino/
│ │ │ │ ├── bad_input_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── bad_output_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── too_few_inputs/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── too_many_inputs/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unknown_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unknown_output/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── python/
│ │ │ │ ├── conflicting_max_batch_size/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── conflicting_scheduler_sequence/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── input_mismatch_datatype/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── input_mismatch_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── input_missing_datatype/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── input_missing_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── input_missing_name/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── input_wrong_property/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── model_transaction_policy_invalid_args/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── model_transaction_policy_mismatch/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── no_return/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── output_mismatch_datatype/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── output_mismatch_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── output_missing_datatype/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── output_missing_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── output_missing_name/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ └── output_wrong_property/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── model.py
│ │ │ ├── pytorch/
│ │ │ │ ├── too_few_inputs/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── too_few_outputs/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── tensorrt/
│ │ │ ├── bad_dynamic_shapes_max/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_dynamic_shapes_min/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_dims/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_non_linear_format_io/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_shape/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_shape_tensor/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_type/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_output_dims/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_output_shape/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_output_shape_tensor/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_output_type/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_outut_non_linear_format_io/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── mixed_batch_hint_dims/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── mixed_batch_hint_shape_values/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── too_few_inputs/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── too_many_inputs/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── unknown_input/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── unknown_output/
│ │ │ ├── config.pbtxt
│ │ │ └── expected
│ │ ├── autofill_noplatform_success/
│ │ │ ├── custom/
│ │ │ │ ├── empty_config.identity/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── no_backend.identity/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── ensemble/
│ │ │ │ ├── embedded_ensemble/
│ │ │ │ │ ├── embedded_ensemble/
│ │ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ │ └── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── inner_ensemble/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── inconsistent_shape/
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim2_nobatch/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── inconsistent_shape/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── inconsistent_shape_2/
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim2_nobatch/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── inconsistent_shape_2/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unmapped_output/
│ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ └── unmapped_output/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── onnx/
│ │ │ │ ├── cpu_instance/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── empty_config/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ ├── no_config/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ └── no_config_no_batch/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.onnx
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ ├── expected.1
│ │ │ │ ├── expected.2
│ │ │ │ └── expected.3
│ │ │ ├── openvino/
│ │ │ │ ├── dynamic_batch/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ ├── empty_config/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ ├── no_config/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ └── partial_config/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected.1
│ │ │ ├── python/
│ │ │ │ ├── conflicting_scheduler_ensemble/
│ │ │ │ │ ├── conflicting_scheduler_ensemble/
│ │ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ │ ├── expected
│ │ │ │ │ │ └── model.py
│ │ │ │ │ ├── ensemble_first_step/
│ │ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── ensemble_second_step/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── model.py
│ │ │ │ ├── dynamic_batching/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── dynamic_batching_no_op/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── empty_config/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ ├── incomplete_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── incomplete_output/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── model_transaction_policy/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── model_transaction_policy_decoupled_false/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── model_transaction_policy_no_op/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── optional_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── unknown_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unknown_output/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── pytorch/
│ │ │ │ ├── cpu_instance/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── no_name_platform/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── tensorrt/
│ │ │ ├── empty_config/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── empty_config_variable/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── hint_for_no_batch/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── incomplete_input/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ ├── expected.1
│ │ │ │ ├── expected.2
│ │ │ │ └── expected.3
│ │ │ ├── incomplete_output/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ ├── expected.1
│ │ │ │ ├── expected.2
│ │ │ │ └── expected.3
│ │ │ ├── multi_prof_max_bs/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── no_config/
│ │ │ │ └── expected
│ │ │ ├── no_config_non_linear_format_io/
│ │ │ │ └── expected
│ │ │ ├── no_config_shape_tensor/
│ │ │ │ └── expected
│ │ │ ├── no_config_variable/
│ │ │ │ └── expected
│ │ │ ├── no_name_platform/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── no_name_platform_variable/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── reshape_config_provided/
│ │ │ ├── config.pbtxt
│ │ │ └── expected
│ │ ├── cli_messages/
│ │ │ ├── cli_deprecation/
│ │ │ │ └── expected
│ │ │ └── cli_override/
│ │ │ └── expected
│ │ ├── compare_status.py
│ │ ├── custom_parameters/
│ │ │ └── tensorrt/
│ │ │ ├── invalid/
│ │ │ │ └── allocation_strategy_invalid_value/
│ │ │ │ ├── expected
│ │ │ │ └── partial.pbtxt
│ │ │ └── valid/
│ │ │ ├── allocation_strategy_no_key/
│ │ │ │ └── partial.pbtxt
│ │ │ ├── allocation_strategy_no_parameters/
│ │ │ │ └── partial.pbtxt
│ │ │ ├── allocation_strategy_value_1/
│ │ │ │ ├── expected
│ │ │ │ └── partial.pbtxt
│ │ │ └── allocation_strategy_value_2/
│ │ │ ├── expected
│ │ │ └── partial.pbtxt
│ │ ├── model_metrics/
│ │ │ ├── invalid_config/
│ │ │ │ ├── empty_buckets/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ ├── empty_metric_family/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ ├── no_buckets/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ ├── no_histogram_options/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ ├── no_metric_family/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ └── no_metric_identifier/
│ │ │ │ ├── expected
│ │ │ │ └── partial.pbtxt
│ │ │ ├── valid_config/
│ │ │ │ └── valid_model_metrics/
│ │ │ │ └── partial.pbtxt
│ │ │ └── valid_config_with_warn/
│ │ │ └── unknown_metric_family/
│ │ │ ├── expected
│ │ │ └── partial.pbtxt
│ │ ├── noautofill_platform/
│ │ │ ├── batch_input_less_source0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_less_source1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_less_source2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_less_source3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_many_source0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_many_source1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_many_source2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_many_source3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_unknown_source/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_duplicated_target/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_less_source/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_many_source/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_unknown_source/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_unknown_target/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── control_kind_end_multiple/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── control_kind_ready_multiple/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── control_kind_start_multiple/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── control_tensor_multiple/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── control_tensor_no_value/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── default_priority_level0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── default_priority_level1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── ensemble_scheduling_set/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── invalid_cpu/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── invalid_gpu/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── missing_datatype/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── negative_gpu/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── negative_max_batch_size/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── preserve_ordering0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── preserve_ordering1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── preserve_ordering2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── priority_level0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── priority_level1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_elementcount0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_elementcount1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_elementcount2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_elementcount3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_empty0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_empty1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable4/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable5/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_zerodims0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_zerodims1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable4/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable5/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_zerodims0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_zerodims1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── zerodims_input0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── zerodims_input1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── zerodims_output0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ └── zerodims_output1/
│ │ │ ├── config.pbtxt
│ │ │ ├── expected
│ │ │ └── expected_ensemble
│ │ ├── special_cases/
│ │ │ ├── invalid_platform/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── invalid_runtime/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── runtime_escape/
│ │ │ ├── config.pbtxt
│ │ │ └── expected
│ │ └── test.sh
│ ├── L0_model_namespacing/
│ │ ├── python_addsub/
│ │ │ └── __init__.py
│ │ ├── python_subadd/
│ │ │ └── __init__.py
│ │ ├── test.py
│ │ ├── test.sh
│ │ ├── test_duplication/
│ │ │ ├── addsub_repo/
│ │ │ │ ├── composing_model/
│ │ │ │ │ └── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── simple_addsub/
│ │ │ │ └── config.pbtxt
│ │ │ └── subadd_repo/
│ │ │ ├── composing_model/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── simple_subadd/
│ │ │ └── config.pbtxt
│ │ ├── test_dynamic_resolution/
│ │ │ ├── addsub_repo/
│ │ │ │ ├── composing_model/
│ │ │ │ │ └── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── simple_addsub/
│ │ │ │ └── config.pbtxt
│ │ │ └── subadd_repo/
│ │ │ ├── composing_model/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── simple_subadd/
│ │ │ └── config.pbtxt
│ │ ├── test_ensemble_duplication/
│ │ │ ├── addsub_repo/
│ │ │ │ ├── composing_addsub/
│ │ │ │ │ └── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── simple_ensemble/
│ │ │ │ └── config.pbtxt
│ │ │ └── subadd_repo/
│ │ │ ├── composing_subadd/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── simple_ensemble/
│ │ │ └── config.pbtxt
│ │ └── test_no_duplication/
│ │ ├── addsub_repo/
│ │ │ ├── composing_addsub/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── simple_addsub/
│ │ │ └── config.pbtxt
│ │ └── subadd_repo/
│ │ ├── composing_subadd/
│ │ │ └── 1/
│ │ │ └── model.py
│ │ └── simple_subadd/
│ │ └── config.pbtxt
│ ├── L0_model_queue/
│ │ ├── ensemble_zero_1_float32/
│ │ │ └── config.pbtxt
│ │ ├── model_queue_test.py
│ │ └── test.sh
│ ├── L0_model_update/
│ │ ├── instance_update_test.py
│ │ └── test.sh
│ ├── L0_multi_server/
│ │ └── test.sh
│ ├── L0_nan_inf/
│ │ ├── models/
│ │ │ └── nan_inf_output/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── nan_inf_test.py
│ │ └── test.sh
│ ├── L0_nullchar_string/
│ │ ├── nullchar_string_client.py
│ │ └── test.sh
│ ├── L0_onnx_optimization/
│ │ └── test.sh
│ ├── L0_openai/
│ │ ├── generate_engine.py
│ │ └── test.sh
│ ├── L0_optional_input/
│ │ ├── models/
│ │ │ ├── ensemble_identity_2_float32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── identity_2_float32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── optional_connecting_tensor/
│ │ │ │ └── config.pbtxt
│ │ │ ├── optional_identity/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── pipeline_identity_2_float32/
│ │ │ └── config.pbtxt
│ │ ├── optional_input_test.py
│ │ └── test.sh
│ ├── L0_orca/
│ │ ├── orca_http_test.py
│ │ └── test.sh
│ ├── L0_output_name/
│ │ ├── output_name_test.py
│ │ └── test.sh
│ ├── L0_output_validation/
│ │ ├── lt_op_val_client.py
│ │ └── test.sh
│ ├── L0_parallel_copy/
│ │ ├── parallel_copy_test.py
│ │ └── test.sh
│ ├── L0_parameters/
│ │ ├── class_count_test.py
│ │ ├── model_repository/
│ │ │ ├── ensemble/
│ │ │ │ └── config.pbtxt
│ │ │ ├── identity/
│ │ │ │ └── config.pbtxt
│ │ │ └── parameter/
│ │ │ └── 1/
│ │ │ └── model.py
│ │ ├── parameters_test.py
│ │ └── test.sh
│ ├── L0_passive_instance/
│ │ ├── models/
│ │ │ └── distributed_int32_int32_int32/
│ │ │ └── config.pbtxt
│ │ ├── passive_instance_test.py
│ │ └── test.sh
│ ├── L0_perf_deeprecommender/
│ │ ├── run_test.sh
│ │ └── test.sh
│ ├── L0_perf_kaldi/
│ │ ├── create_data.sh
│ │ └── test.sh
│ ├── L0_perf_nomodel/
│ │ ├── custom_models/
│ │ │ └── custom_zero_1_float32/
│ │ │ └── config.pbtxt
│ │ ├── run_test.sh
│ │ └── test.sh
│ ├── L0_perf_pyclients/
│ │ ├── custom_models/
│ │ │ └── custom_zero_1_int32/
│ │ │ └── config.pbtxt
│ │ ├── simple_perf_client.py
│ │ └── test.sh
│ ├── L0_perf_resnet/
│ │ ├── run_test.sh
│ │ └── test.sh
│ ├── L0_perf_tensorrt_llm/
│ │ └── test.sh
│ ├── L0_perf_vllm/
│ │ └── test.sh
│ ├── L0_pinned_memory/
│ │ ├── libtorch_ensemble.pbtxt
│ │ └── test.sh
│ ├── L0_priority/
│ │ └── test.sh
│ ├── L0_python_api/
│ │ ├── test.sh
│ │ ├── test_kserve.py
│ │ ├── test_model_repository/
│ │ │ ├── delayed_identity/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── identity/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ └── testing_utils.py
│ ├── L0_python_client_unit_tests/
│ │ └── test.sh
│ ├── L0_pytorch_python_runtime/
│ │ ├── infer.py
│ │ ├── test.sh
│ │ └── unit_test.py
│ ├── L0_query/
│ │ ├── models/
│ │ │ └── query/
│ │ │ └── config.pbtxt
│ │ ├── query_e2e.py
│ │ └── test.sh
│ ├── L0_rate_limiter/
│ │ ├── rate_limiter_test.py
│ │ └── test.sh
│ ├── L0_register/
│ │ ├── config.pbtxt
│ │ └── test.sh
│ ├── L0_repoagent_checksum/
│ │ ├── identity_test.py
│ │ ├── models/
│ │ │ └── identity_int32/
│ │ │ ├── config.pbtxt
│ │ │ └── data_file
│ │ └── test.sh
│ ├── L0_request_cancellation/
│ │ ├── grpc_cancellation_test.py
│ │ ├── implicit_state_model/
│ │ │ ├── config.pbtxt
│ │ │ ├── gen_model.py
│ │ │ └── model.pt
│ │ ├── implicit_state_test.py
│ │ ├── scheduler_test.py
│ │ └── test.sh
│ ├── L0_response_cache/
│ │ ├── ensemble_cache_test.py
│ │ ├── generate_random_data.py
│ │ ├── models/
│ │ │ ├── decoupled_cache/
│ │ │ │ └── config.pbtxt
│ │ │ └── identity_cache/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_response_statistics/
│ │ ├── response_statistics_test.py
│ │ └── test.sh
│ ├── L0_sagemaker/
│ │ ├── sagemaker_generate_stream_test.py
│ │ ├── sagemaker_generate_test.py
│ │ ├── sagemaker_multi_model_test.py
│ │ ├── sagemaker_request_many_chunks.py
│ │ ├── sagemaker_test.py
│ │ └── test.sh
│ ├── L0_scalar_io/
│ │ ├── scalar_test.py
│ │ └── test.sh
│ ├── L0_sdk/
│ │ ├── grpc_test.cc
│ │ ├── http_test.cc
│ │ └── test.sh
│ ├── L0_secure_grpc/
│ │ └── test.sh
│ ├── L0_sequence_batcher/
│ │ ├── request_timeout_models/
│ │ │ └── custom_sequence_int32_timeout/
│ │ │ └── config.pbtxt
│ │ ├── sequence_batcher_test.py
│ │ └── test.sh
│ ├── L0_sequence_corrid_batcher/
│ │ ├── sequence_corrid_batcher_test.py
│ │ └── test.sh
│ ├── L0_sequence_stress/
│ │ ├── sequence_stress.py
│ │ └── test.sh
│ ├── L0_server_status/
│ │ ├── server_status_test.py
│ │ └── test.sh
│ ├── L0_shared_memory/
│ │ ├── shared_memory_test.py
│ │ └── test.sh
│ ├── L0_simple_ensemble/
│ │ ├── backpressure_test_models/
│ │ │ ├── decoupled_producer/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── ensemble_disabled_max_inflight_requests/
│ │ │ └── config.pbtxt
│ │ ├── ensemble_backpressure_test.py
│ │ ├── ensemble_test.py
│ │ ├── models/
│ │ │ ├── ensemble_add_sub_int32_int32_int32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── ensemble_partial_add_sub/
│ │ │ │ └── config.pbtxt
│ │ │ ├── partial_add_sub/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── simple/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_simple_example/
│ │ └── test.sh
│ ├── L0_simple_go_client/
│ │ └── test.sh
│ ├── L0_simple_lib/
│ │ └── test.sh
│ ├── L0_simple_nodejs_client/
│ │ └── test.sh
│ ├── L0_socket/
│ │ ├── models/
│ │ │ └── simple/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_storage_S3/
│ │ └── test.sh
│ ├── L0_storage_S3_local/
│ │ ├── mock_s3_service.py
│ │ └── test.sh
│ ├── L0_storage_azure/
│ │ └── test.sh
│ ├── L0_storage_swiftstack/
│ │ ├── infer_test.py
│ │ └── test.sh
│ ├── L0_string_io/
│ │ ├── string_client_test.py
│ │ └── test.sh
│ ├── L0_trace/
│ │ ├── models/
│ │ │ └── input_all_required/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── opentelemetry_unittest.py
│ │ ├── test.sh
│ │ ├── trace-config.yaml
│ │ ├── trace_context.py
│ │ ├── trace_endpoint_test.py
│ │ └── trace_stress_grpc_client.py
│ ├── L0_triton_repo_agent/
│ │ ├── models/
│ │ │ ├── chain_relocation/
│ │ │ │ └── config.pbtxt
│ │ │ └── relocation_sanity_check/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_trt_bf16_dtype/
│ │ ├── test.sh
│ │ └── trt_bf16_dtype_test.py
│ ├── L0_trt_compat/
│ │ ├── test.sh
│ │ └── trt_compatibility_test.py
│ ├── L0_trt_data_dependent_shape/
│ │ ├── test.sh
│ │ └── trt_data_dependent_shape_test.py
│ ├── L0_trt_dla/
│ │ ├── dla_test.py
│ │ └── test.sh
│ ├── L0_trt_dynamic_shape/
│ │ ├── test.sh
│ │ └── trt_dynamic_shape_test.py
│ ├── L0_trt_error_propagation/
│ │ ├── test.sh
│ │ └── trt_error_propagation_test.py
│ ├── L0_trt_plugin/
│ │ ├── test.sh
│ │ └── trt_plugin_test.py
│ ├── L0_trt_reformat_free/
│ │ ├── test.sh
│ │ └── trt_reformat_free_test.py
│ ├── L0_trt_shape_tensors/
│ │ ├── test.sh
│ │ └── trt_shape_tensor_test.py
│ ├── L0_vertex_ai/
│ │ ├── test.sh
│ │ └── vertex_ai_test.py
│ ├── L0_warmup/
│ │ ├── decoupled/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── failing_infer/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── raw_mug_data
│ │ └── test.sh
│ ├── common/
│ │ ├── busy_op_kernel.cu.cc
│ │ ├── check_copyright.py
│ │ ├── check_massif_log.py
│ │ ├── check_valgrind_log.py
│ │ ├── gen_common.py
│ │ ├── gen_ensemble_model_utils.py
│ │ ├── gen_jetson_trt_models
│ │ ├── gen_qa_custom_ops_models.py
│ │ ├── gen_qa_dyna_sequence_implicit_models.py
│ │ ├── gen_qa_dyna_sequence_models.py
│ │ ├── gen_qa_identity_models.py
│ │ ├── gen_qa_image_models.py
│ │ ├── gen_qa_implicit_models.py
│ │ ├── gen_qa_model_repository
│ │ ├── gen_qa_models.py
│ │ ├── gen_qa_ort_scalar_models.py
│ │ ├── gen_qa_pytorch_model.py
│ │ ├── gen_qa_ragged_models.py
│ │ ├── gen_qa_reshape_models.py
│ │ ├── gen_qa_sequence_models.py
│ │ ├── gen_qa_torchtrt_models.py
│ │ ├── gen_qa_trt_data_dependent_shape.py
│ │ ├── gen_qa_trt_format_models.py
│ │ ├── gen_qa_trt_plugin_models.py
│ │ ├── infer_test.py
│ │ ├── infer_util.py
│ │ ├── inferentia_perf_analyzer_input_data_json/
│ │ │ ├── non_aligned_validation_batched.json
│ │ │ ├── non_aligned_validation_no_batch.json
│ │ │ ├── simple_model.py
│ │ │ ├── validation_batched.json
│ │ │ ├── validation_no_batch.json
│ │ │ ├── wrong_validation_batched.json
│ │ │ └── wrong_validation_no_batch.json
│ │ ├── libtorch_infer_client.py
│ │ ├── nightly_email_helper.py
│ │ ├── orca_header_test.py
│ │ ├── perf_analyzer_input_data_json/
│ │ │ ├── float_data_with_shape.json
│ │ │ ├── image_data.json
│ │ │ ├── int_data.json
│ │ │ ├── int_data_diff_shape.json
│ │ │ ├── int_data_optional.json
│ │ │ ├── non_aligned_output.json
│ │ │ ├── output.json
│ │ │ ├── repeat_int32_data.json
│ │ │ ├── seq_data.json
│ │ │ ├── seq_output.json
│ │ │ ├── seq_wrong_output.json
│ │ │ ├── shape_tensor_data.json
│ │ │ ├── string_data.json
│ │ │ ├── string_data_with_shape.json
│ │ │ ├── wrong_output.json
│ │ │ └── wrong_output_2.json
│ │ ├── reporter.py
│ │ ├── resnet50_labels.txt
│ │ ├── run_all_tests.sh
│ │ ├── sequence_util.py
│ │ ├── shm_util.py
│ │ ├── show_testlogs
│ │ ├── test_util.py
│ │ ├── trace_summary.py
│ │ ├── trtllm_util.sh
│ │ └── util.sh
│ ├── custom_models/
│ │ ├── custom_dyna_sequence_int32/
│ │ │ └── config.pbtxt
│ │ ├── custom_sequence_int32/
│ │ │ └── config.pbtxt
│ │ └── custom_zero_1_float32/
│ │ └── config.pbtxt
│ ├── ensemble_models/
│ │ ├── batch_to_nobatch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── batch_to_nobatch_nobatch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── label_override_int32_float32_float32/
│ │ │ ├── config.pbtxt
│ │ │ └── output0_labels.txt
│ │ ├── mix_ensemble_int32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── mix_nobatch_batch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── mix_platform_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── mix_type_int32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── nobatch_to_batch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── nobatch_to_batch_nobatch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ └── wrong_label_int32_float32_float32/
│ │ ├── config.pbtxt
│ │ └── output0_labels.txt
│ ├── openvino_models/
│ │ ├── README.md
│ │ ├── dynamic_batch/
│ │ │ └── 1/
│ │ │ └── model.mapping
│ │ └── fixed_batch/
│ │ └── 1/
│ │ └── model.mapping
│ └── python_models/
│ ├── add_sub/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── add_sub_gpu/
│ │ └── config.pbtxt
│ ├── async_execute_decouple/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── async_execute_decouple_bls/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── auto_complete/
│ │ └── model.py
│ ├── auto_complete_error/
│ │ └── model.py
│ ├── bls/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_async/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_finalize_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_init_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_memory/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_memory_async/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_model_loading/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_onnx_warmup/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_parameters/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_request_rescheduling/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_simple/
│ │ └── bls_simple.py
│ ├── bls_undefined/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── busy_op/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── cuda_memory_consumer/
│ │ ├── 1/
│ │ │ └── model.py
│ │ └── config.pbtxt
│ ├── custom_metrics/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── delayed_model/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_add_sub/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_empty_output/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_identity/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_io_identity/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_io_identity_decoupled/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_square/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_sub_add/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_test/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── ensemble/
│ │ └── config.pbtxt
│ ├── ensemble_gpu/
│ │ └── config.pbtxt
│ ├── ensemble_io/
│ │ └── config.pbtxt
│ ├── error_code/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_cancel/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_delayed_model/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_grpc_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_return_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── fan_add_sub/
│ │ └── config.pbtxt
│ ├── fini_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── generate_models/
│ │ └── mock_llm/
│ │ ├── 1/
│ │ │ └── model.py
│ │ └── config.pbtxt
│ ├── ground_truth/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── identity_bf16/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── identity_fp32/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── identity_fp32_logging/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── identity_fp32_timeout/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── init_args/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── init_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── init_exit/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── iterative_sequence/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── model_env/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── model_init_del/
│ │ ├── config.pbtxt
│ │ ├── model.py
│ │ └── util.py
│ ├── multi_file/
│ │ ├── file1.py
│ │ ├── file2.py
│ │ └── model.py
│ ├── non_contiguous/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── optional/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── python_based_backends/
│ │ └── add_sub_backend/
│ │ └── model.py
│ ├── python_version/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── pytorch_fp32_fp32/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── request_rescheduling_addsub/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_parameters/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_parameters_bls/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_parameters_decoupled/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_sender/
│ │ ├── config.pbtxt
│ │ ├── model.py
│ │ ├── model_async.py
│ │ └── model_common.py
│ ├── response_sender_complete_final/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_sender_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_sender_until_cancelled/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── sequence_int32/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── sequence_py/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── simple_identity_fp32/
│ │ └── config.pbtxt
│ ├── string/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── string_fixed/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── string_identity/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── sub_add/
│ │ └── model.py
│ ├── torchvision/
│ │ └── resnet50/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── variable_gpu_output/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── wrong_model/
│ │ ├── config.pbtxt
│ │ └── model.py
│ └── wrong_return_type/
│ ├── config.pbtxt
│ └── model.py
├── src/
│ ├── CMakeLists.txt
│ ├── classification.cc
│ ├── classification.h
│ ├── command_line_parser.cc
│ ├── command_line_parser.h
│ ├── common.cc
│ ├── common.h
│ ├── data_compressor.h
│ ├── grpc/
│ │ ├── CMakeLists.txt
│ │ ├── grpc_handler.h
│ │ ├── grpc_server.cc
│ │ ├── grpc_server.h
│ │ ├── grpc_utils.cc
│ │ ├── grpc_utils.h
│ │ ├── infer_handler.cc
│ │ ├── infer_handler.h
│ │ ├── stream_infer_handler.cc
│ │ └── stream_infer_handler.h
│ ├── http_server.cc
│ ├── http_server.h
│ ├── main.cc
│ ├── memory_alloc.cc
│ ├── multi_server.cc
│ ├── orca_http.cc
│ ├── orca_http.h
│ ├── python/
│ │ ├── CMakeLists.txt
│ │ ├── build_wheel.py
│ │ ├── examples/
│ │ │ ├── example.py
│ │ │ └── example_model_repository/
│ │ │ └── identity/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── setup.py
│ │ └── tritonfrontend/
│ │ ├── CMakeLists.txt
│ │ ├── __init__.py
│ │ ├── __init__.pyi
│ │ ├── _api/
│ │ │ ├── __init__.py
│ │ │ ├── _error_mapping.py
│ │ │ ├── _kservegrpc.py
│ │ │ ├── _kservegrpc.pyi
│ │ │ ├── _kservehttp.py
│ │ │ ├── _kservehttp.pyi
│ │ │ ├── _metrics.py
│ │ │ └── _metrics.pyi
│ │ ├── _c/
│ │ │ ├── __init__.py
│ │ │ ├── __init__.pyi
│ │ │ ├── tritonfrontend.h
│ │ │ ├── tritonfrontend_bindings.pyi
│ │ │ └── tritonfrontend_pybind.cc
│ │ └── py.typed
│ ├── restricted_features.h
│ ├── sagemaker_server.cc
│ ├── sagemaker_server.h
│ ├── shared_memory_manager.cc
│ ├── shared_memory_manager.h
│ ├── simple.cc
│ ├── test/
│ │ ├── CMakeLists.txt
│ │ ├── data_compressor_test.cc
│ │ ├── distributed_addsub/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonDistributedAddsubBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── distributed_addsub.cc
│ │ │ └── libtriton_distributed_addsub.ldscript
│ │ ├── dyna_sequence/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonDynaSequenceBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── dyna_sequence.cc
│ │ │ └── libtriton_dyna_sequence.ldscript
│ │ ├── implicit_state/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonImplicitStateBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── implicit_state.cc
│ │ │ └── libtriton_implicit_state.ldscript
│ │ ├── iterative_sequence/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonIterativeSequenceBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── iterative_sequence.cc
│ │ │ └── libtriton_iterative_sequence.ldscript
│ │ ├── models/
│ │ │ ├── identity_fp32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── repeat_int32/
│ │ │ │ └── config.pbtxt
│ │ │ └── square_int32/
│ │ │ └── config.pbtxt
│ │ ├── query_backend/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonQueryBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── libtriton_query.ldscript
│ │ │ └── query.cc
│ │ ├── repoagent/
│ │ │ └── relocation_repoagent/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonRelocationRepoAgentConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── libtritonrepoagent_relocation.ldscript
│ │ │ └── relocation.cc
│ │ ├── sequence/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonSequenceBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── libtriton_sequence.ldscript
│ │ │ └── sequence.cc
│ │ └── tensor_size_test.cc
│ ├── tracer.cc
│ ├── tracer.h
│ ├── triton_signal.cc
│ ├── triton_signal.h
│ ├── vertex_ai_server.cc
│ └── vertex_ai_server.h
└── tools/
└── add_copyright.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .clang-format
================================================
---
BasedOnStyle: Google
IndentWidth: 2
ColumnLimit: 80
ContinuationIndentWidth: 4
UseTab: Never
MaxEmptyLinesToKeep: 2
SortIncludes: true
CompactNamespaces: true
ReflowComments: true
DerivePointerAlignment: false
PointerAlignment: Left
AllowShortIfStatementsOnASingleLine: false
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AlwaysBreakAfterReturnType: TopLevelDefinitions
AlignAfterOpenBracket: AlwaysBreak
BreakBeforeBraces: Custom
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterStruct: false
AfterUnion: false
BeforeCatch: true
BinPackArguments: true
BinPackParameters: true
ConstructorInitializerAllOnOneLineOrOnePerLine: false
IndentCaseLabels: true
================================================
FILE: .dockerignore
================================================
.git*
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Description**
A clear and concise description of what the bug is.
**Triton Information**
What version of Triton are you using?
Are you using the Triton container or did you build it yourself?
**To Reproduce**
Steps to reproduce the behavior.
Describe the models (framework, inputs, outputs), ideally include the model configuration file (if using an ensemble include the model configuration file for that as well).
**Expected behavior**
A clear and concise description of what you expected to happen.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.
================================================
FILE: .github/PULL_REQUEST_TEMPLATE/pull_request_template_external_contrib.md
================================================
#### What does the PR do?
<!-- Describe your pull request here. Please read the text below the line, and make sure you follow the checklist.-->
#### Checklist
- [ ] I have read the [Contribution guidelines](#../../CONTRIBUTING.md) and signed the [Contributor License
Agreement](https://github.com/NVIDIA/triton-inference-server/blob/master/Triton-CCLA-v1.pdf)
- [ ] PR title reflects the change and is of format `<commit_type>: <Title>`
- [ ] Changes are described in the pull request.
- [ ] Related issues are referenced.
- [ ] Populated [github labels](https://docs.github.com/en/issues/using-labels-and-milestones-to-track-work/managing-labels) field
- [ ] Added [test plan](#test-plan) and verified test passes.
- [ ] Verified that the PR passes existing CI.
- [ ] I ran pre-commit locally (`pre-commit install, pre-commit run --all`)
- [ ] Verified copyright is correct on all changed files.
- [ ] Added _succinct_ git squash message before merging [ref](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
- [ ] All template sections are filled out.
- [ ] Optional: Additional screenshots for behavior/output changes with before/after.
#### Commit Type:
Check the [conventional commit type](https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#type)
box here and add the label to the github PR.
- [ ] build
- [ ] ci
- [ ] docs
- [ ] feat
- [ ] fix
- [ ] perf
- [ ] refactor
- [ ] revert
- [ ] style
- [ ] test
#### Related PRs:
<!-- Related PRs from other Repositories -->
#### Where should the reviewer start?
<!-- call out specific files that should be looked at closely -->
#### Test plan:
<!-- list steps to verify feature works -->
<!-- were e2e tests added?-->
#### Caveats:
<!-- any limitations or possible things missing from this PR -->
#### Background
<!-- e.g. what led to this change being made. this is optional extra information to help the reviewer -->
#### Related Issues: (use one of the action keywords Closes / Fixes / Resolves / Relates to)
- closes GitHub issue: #xxx
================================================
FILE: .github/PULL_REQUEST_TEMPLATE/pull_request_template_internal_contrib.md
================================================
#### What does the PR do?
<!-- Describe your pull request here. Please read the text below the line, and make sure you follow the checklist.-->
#### Checklist
- [ ] PR title reflects the change and is of format `<commit_type>: <Title>`
- [ ] Changes are described in the pull request.
- [ ] Related issues are referenced.
- [ ] Populated [github labels](https://docs.github.com/en/issues/using-labels-and-milestones-to-track-work/managing-labels) field
- [ ] Added [test plan](#test-plan) and verified test passes.
- [ ] Verified that the PR passes existing CI.
- [ ] Verified copyright is correct on all changed files.
- [ ] Added _succinct_ git squash message before merging [ref](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
- [ ] All template sections are filled out.
- [ ] Optional: Additional screenshots for behavior/output changes with before/after.
#### Commit Type:
Check the [conventional commit type](https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#type)
box here and add the label to the github PR.
- [ ] build
- [ ] ci
- [ ] docs
- [ ] feat
- [ ] fix
- [ ] perf
- [ ] refactor
- [ ] revert
- [ ] style
- [ ] test
#### Related PRs:
<!-- Related PRs from other Repositories -->
#### Where should the reviewer start?
<!-- call out specific files that should be looked at closely -->
#### Test plan:
<!-- list steps to verify -->
<!-- were e2e tests added?-->
- CI Pipeline ID:
<!-- Only Pipeline ID and no direct link here -->
#### Caveats:
<!-- any limitations or possible things missing from this PR -->
#### Background
<!-- e.g. what led to this change being made. this is optional extra information to help the reviewer -->
#### Related Issues: (use one of the action keywords Closes / Fixes / Resolves / Relates to)
- closes GitHub issue: #xxx
================================================
FILE: .github/pull_request_template.md
================================================
Thanks for submitting a PR to Triton!
Please go the the `Preview` tab above this description box and select the appropriate sub-template:
* [PR description template for Triton Engineers](?expand=1&template=pull_request_template_internal_contrib.md)
* [PR description template for External Contributors](?expand=1&template=pull_request_template_external_contrib.md)
If you already created the PR, please replace this message with one of
* [External contribution template](https://raw.githubusercontent.com/triton-inference-server/server/main/.github/PULL_REQUEST_TEMPLATE/pull_request_template_external_contrib.md)
* [Internal contribution template](https://raw.githubusercontent.com/triton-inference-server/server/main/.github/PULL_REQUEST_TEMPLATE/pull_request_template_internal_contrib.md)
and fill it out.
================================================
FILE: .github/workflows/codeql.yml
================================================
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "CodeQL"
on:
pull_request:
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'python' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# Details on CodeQL's query packs refer to:
# https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
queries: +security-and-quality
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
# If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
# - run: |
# echo "Run, Build Application using script"
# ./location_of_script_within_repo/buildscript.sh
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"
================================================
FILE: .github/workflows/pre-commit.yml
================================================
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: pre-commit
on:
pull_request:
jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5.0.0
with:
fetch-depth: 2
- name: Get modified files
id: modified-files
run: echo "modified_files=$(git diff --name-only -r HEAD^1 HEAD | xargs)" >> $GITHUB_OUTPUT
- uses: actions/setup-python@v6.0.0
- uses: pre-commit/action@v3.0.1
with:
extra_args: --files ${{ steps.modified-files.outputs.modified_files }}
================================================
FILE: .gitignore
================================================
/build
/builddir
/.vscode
*.so
__pycache__
tmp
*.log
*.xml
test_results.txt
artifacts
cprofile
*.prof
.venv
**/.venv
# Test exclusions
qa/L0_openai/openai
tensorrtllm_models
tensorrtllm_mistral_models/
custom_tokenizer
================================================
FILE: .pre-commit-config.yaml
================================================
# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
repos:
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
additional_dependencies: [toml]
- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black
types_or: [python, cython]
- repo: https://github.com/PyCQA/flake8
rev: 7.3.0
hooks:
- id: flake8
args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
types_or: [python, cython]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.5
hooks:
- id: clang-format
types_or: [c, c++, cuda, proto, textproto, java]
args: ["-fallback-style=none", "-style=file", "-i"]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
# More details about these pre-commit hooks here:
# https://pre-commit.com/hooks.html
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-merge-conflict
- id: check-json
- id: check-toml
- id: check-yaml
exclude: ^deploy(\/[^\/]+)*\/templates\/.*$
- id: check-shebang-scripts-are-executable
- id: end-of-file-fixer
types_or: [c, c++, cuda, proto, textproto, java, python]
- id: mixed-line-ending
- id: requirements-txt-fixer
- id: trailing-whitespace
- repo: local
hooks:
- id: add-license
name: Add License
entry: python tools/add_copyright.py
language: python
stages: [pre-commit]
verbose: true
require_serial: true
================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
title: "Triton Inference Server: An Optimized Cloud and Edge Inferencing Solution."
url: https://github.com/triton-inference-server
repository-code: https://github.com/triton-inference-server/server
authors:
- name: "NVIDIA Corporation"
================================================
FILE: CMakeLists.txt
================================================
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.31.8)
project(tritonserver LANGUAGES C CXX)
include(CMakeDependentOption)
# Use C++17 standard as Triton's minimum required.
set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.")
set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library" )
option(TRITON_ENABLE_LOGGING "Include logging support in server" ON)
option(TRITON_ENABLE_STATS "Include statistics collections in server" ON)
option(TRITON_ENABLE_TRACING "Include tracing support in server" OFF)
option(TRITON_ENABLE_NVTX "Include NVTX support in server" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in server" ON)
option(TRITON_ENABLE_MALI_GPU "Enable Arm Mali GPU support in server" OFF)
option(TRITON_IGPU_BUILD "Enable options for iGPU compilation in sever" OFF)
set(TRITON_MIN_COMPUTE_CAPABILITY "7.5" CACHE STRING
"The minimum CUDA compute capability supported by Triton" )
set(TRITON_EXTRA_LIB_PATHS "" CACHE PATH "Extra library paths for Triton Server build")
# Ensemble
option(TRITON_ENABLE_ENSEMBLE "Include ensemble support in server" OFF)
# Endpoints
option(TRITON_ENABLE_HTTP "Include HTTP API in server" ON)
option(TRITON_ENABLE_GRPC "Include GRPC API in server" ON)
option(TRITON_ENABLE_SAGEMAKER "Include AWS SageMaker API in server" OFF)
option(TRITON_ENABLE_VERTEX_AI "Include Vertex AI API in server" OFF)
# Metrics
option(TRITON_ENABLE_METRICS "Include metrics support in server" ON)
option(TRITON_ENABLE_METRICS_GPU "Include GPU metrics support in server" ON)
option(TRITON_ENABLE_METRICS_CPU "Include CPU metrics support in server" ON)
# Cloud storage
option(TRITON_ENABLE_GCS "Include GCS Filesystem support in server" OFF)
option(TRITON_ENABLE_S3 "Include S3 Filesystem support in server" OFF)
option(TRITON_ENABLE_AZURE_STORAGE "Include Azure Storage Filesystem support in server" OFF)
# Need to know if TensorRT is available when building unit tests
option(TRITON_ENABLE_TENSORRT "Include TensorRT backend in server" OFF)
# ASAN
option(TRITON_ENABLE_ASAN "Build with address sanitizer" OFF)
# Repo tags
set(TRITON_REPO_ORGANIZATION "https://github.com/triton-inference-server" CACHE STRING "Git repository to pull from")
set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING
"Tag for triton-inference-server/third_party repo")
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
# Third-party location
set(TRITON_THIRD_PARTY_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party" CACHE STRING "Location of third-party build")
set(TRITON_THIRD_PARTY_SRC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party-src" CACHE STRING "Location of third-party source")
if(TRITON_ENABLE_METRICS AND NOT TRITON_ENABLE_STATS)
message(FATAL_ERROR "TRITON_ENABLE_METRICS=ON requires TRITON_ENABLE_STATS=ON")
endif()
if(TRITON_ENABLE_TRACING AND NOT TRITON_ENABLE_STATS)
message(FATAL_ERROR "TRITON_ENABLE_TRACING=ON requires TRITON_ENABLE_STATS=ON")
endif()
if (TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS)
message(FATAL_ERROR "TRITON_ENABLE_METRICS_CPU=ON requires TRITON_ENABLE_METRICS=ON")
endif()
if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS)
message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_METRICS=ON")
endif()
if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU)
message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_GPU=ON")
endif()
if(TRITON_ENABLE_ASAN AND TRITON_ENABLE_GPU)
message(FATAL_ERROR "TRITON_ENABLE_ASAN=ON requires TRITON_ENABLE_GPU=OFF")
endif()
#
# Dependencies
#
include(FetchContent)
FetchContent_Declare(
repo-core
GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
GIT_TAG ${TRITON_CORE_REPO_TAG}
)
FetchContent_Declare(
repo-third-party
GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/third_party.git
GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
)
# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
set(LIB_DIR "lib")
if(LINUX)
file(STRINGS "/etc/os-release" DISTRO_ID_LIKE REGEX "ID_LIKE")
if(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
set (LIB_DIR "lib64")
endif(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
endif(LINUX)
set(TRITON_CORE_HEADERS_ONLY OFF)
FetchContent_MakeAvailable(repo-third-party repo-core)
#
# Triton server executable and examples
#
# Need to use ExternalProject for our builds so that we can get the
# correct dependencies between Triton executable and the
# ExternalProject dependencies (found in the third_party repo)
include(ExternalProject)
# If CMAKE_TOOLCHAIN_FILE is set, propagate that hint path to the external
# projects.
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
if (CMAKE_TOOLCHAIN_FILE)
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
endif()
# If VCPKG_TARGET_TRIPLET is set, propagate that hint path to the external
# projects.
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
if (VCPKG_TARGET_TRIPLET)
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
endif()
# If OPENSSL_ROOT_DIR is set, propagate that hint path to the external
# projects with OpenSSL dependency.
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
if (OPENSSL_ROOT_DIR)
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
endif()
# Location where protobuf-config.cmake will be installed varies by
# platform
if (WIN32)
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
else()
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf")
endif()
# Triton with Opentelemetry is not supported on Windows
# FIXME: add location for Windows, when support is added
# JIRA DLIS-4786
if (WIN32)
set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "")
else()
set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/opentelemetry-cpp/${LIB_DIR}/cmake/opentelemetry-cpp")
endif()
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
else()
set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
endif()
set(TRITON_DEPENDS triton-core protobuf googletest re2)
if(${TRITON_ENABLE_GCS})
set(TRITON_DEPENDS ${TRITON_DEPENDS} google-cloud-cpp)
endif() # TRITON_ENABLE_GCS
if(${TRITON_ENABLE_S3})
set(TRITON_DEPENDS ${TRITON_DEPENDS} aws-sdk-cpp)
endif() # TRITON_ENABLE_S3
if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
set(TRITON_DEPENDS ${TRITON_DEPENDS} libevent libevhtp)
endif() # TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS || TRITON_ENABLE_SAGEMAKER || TRITON_ENABLE_VERTEX_AI
if(${TRITON_ENABLE_GRPC})
set(TRITON_DEPENDS ${TRITON_DEPENDS} grpc)
endif() # TRITON_ENABLE_GRPC
if(NOT WIN32 AND ${TRITON_ENABLE_TRACING})
set(TRITON_DEPENDS ${TRITON_DEPENDS} opentelemetry-cpp)
endif() # TRITON_ENABLE_TRACING
ExternalProject_Add(triton-server
PREFIX triton-server
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/triton-server"
CMAKE_CACHE_ARGS
-DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
-DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
-Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
-Dre2_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/re2/${LIB_DIR}/cmake/re2
-Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
-DCURL_DIR:STRING=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/curl/${LIB_DIR}/cmake/CURL
-Dnlohmann_json_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/nlohmann_json/share/cmake/nlohmann_json
-DLibevent_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevent/lib/cmake/libevent
-Dlibevhtp_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevhtp/lib/cmake/libevhtp
-Dstorage_client_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/storage_client
-Dgoogle_cloud_cpp_common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/google_cloud_cpp_common
-DCrc32c_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/crc32c/${LIB_DIR}/cmake/Crc32c
-DAWSSDK_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/AWSSDK
-Daws-cpp-sdk-core_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-core
-Daws-cpp-sdk-s3_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-s3
-Daws-c-event-stream_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-event-stream/cmake
-Daws-c-common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-common/cmake
-Daws-checksums_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-checksums/cmake
-Dopentelemetry-cpp_DIR:PATH=${_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR}
-DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION}
-DTRITON_IGPU_BUILD:BOOL=${TRITON_IGPU_BUILD}
-DTRITON_THIRD_PARTY_REPO_TAG:STRING=${TRITON_THIRD_PARTY_REPO_TAG}
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
-DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
-DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG}
-DTRITON_EXTRA_LIB_PATHS:PATH=${TRITON_EXTRA_LIB_PATHS}
-DTRITON_ENABLE_ASAN:BOOL=${TRITON_ENABLE_ASAN}
-DTRITON_ENABLE_NVTX:BOOL=${TRITON_ENABLE_NVTX}
-DTRITON_ENABLE_TRACING:BOOL=${TRITON_ENABLE_TRACING}
-DTRITON_ENABLE_LOGGING:BOOL=${TRITON_ENABLE_LOGGING}
-DTRITON_ENABLE_STATS:BOOL=${TRITON_ENABLE_STATS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
-DTRITON_ENABLE_MALI_GPU:BOOL=${TRITON_ENABLE_MALI_GPU}
-DTRITON_ENABLE_HTTP:BOOL=${TRITON_ENABLE_HTTP}
-DTRITON_ENABLE_SAGEMAKER:BOOL=${TRITON_ENABLE_SAGEMAKER}
-DTRITON_ENABLE_VERTEX_AI:BOOL=${TRITON_ENABLE_VERTEX_AI}
-DTRITON_ENABLE_GRPC:BOOL=${TRITON_ENABLE_GRPC}
-DTRITON_MIN_COMPUTE_CAPABILITY:STRING=${TRITON_MIN_COMPUTE_CAPABILITY}
-DTRITON_ENABLE_METRICS:BOOL=${TRITON_ENABLE_METRICS}
-DTRITON_ENABLE_METRICS_GPU:BOOL=${TRITON_ENABLE_METRICS_GPU}
-DTRITON_ENABLE_METRICS_CPU:BOOL=${TRITON_ENABLE_METRICS_CPU}
-DTRITON_ENABLE_GCS:BOOL=${TRITON_ENABLE_GCS}
-DTRITON_ENABLE_AZURE_STORAGE:BOOL=${TRITON_ENABLE_AZURE_STORAGE}
-DTRITON_ENABLE_S3:BOOL=${TRITON_ENABLE_S3}
-DTRITON_ENABLE_TENSORRT:BOOL=${TRITON_ENABLE_TENSORRT}
-DTRITON_ENABLE_ENSEMBLE:BOOL=${TRITON_ENABLE_ENSEMBLE}
-DTRITON_MIN_CXX_STANDARD:STRING=${TRITON_MIN_CXX_STANDARD}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
-DTRITON_VERSION:STRING=${TRITON_VERSION}
DEPENDS ${TRITON_DEPENDS}
)
================================================
FILE: CONTRIBUTING.md
================================================
<!--
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# Contribution Guidelines
Contributions that fix documentation errors or that make small changes
to existing code can be contributed directly by following the rules
below and submitting an appropriate PR.
Contributions intended to add significant new functionality must
follow a more collaborative path described in the following
points. Before submitting a large PR that adds a major enhancement or
extension, be sure to submit a GitHub issue that describes the
proposed change so that the Triton team can provide feedback.
- As part of the GitHub issue discussion, a design for your change
will be agreed upon. An up-front design discussion is required to
ensure that your enhancement is done in a manner that is consistent
with Triton's overall architecture.
- The Triton project is spread across multiple repos. The Triton team
will provide guidance about how and where your enhancement should be
implemented.
- [Testing](docs/customization_guide/test.md) is a critical part of any Triton
enhancement. You should plan on spending significant time on
creating tests for your change. The Triton team will help you to
design your testing so that it is compatible with existing testing
infrastructure.
- If your enhancement provides a user visible feature then you need to
provide documentation.
# Contribution Rules
- The code style convention is enforced by clang-format. See below on
how to ensure your contributions conform. In general please follow
the existing conventions in the relevant file, submodule, module,
and project when you add new code or when you extend/fix existing
functionality.
- Avoid introducing unnecessary complexity into existing code so that
maintainability and readability are preserved.
- Try to keep pull requests (PRs) as concise as possible:
- Avoid committing commented-out code.
- Wherever possible, each PR should address a single concern. If
there are several otherwise-unrelated things that should be fixed
to reach a desired endpoint, it is perfectly fine to open several
PRs and state in the description which PR depends on another
PR. The more complex the changes are in a single PR, the more time
it will take to review those changes.
- Make sure that the build log is clean, meaning no warnings or
errors should be present.
- Make sure all `L0_*` tests pass:
- In the `qa/` directory, there are basic sanity tests scripted in
directories named `L0_...`. See the [Test](docs/customization_guide/test.md)
documentation for instructions on running these tests.
- Triton Inference Server's default build assumes recent versions of
dependencies (CUDA, PyTorch, TensorRT,
etc.). Contributions that add compatibility with older versions of
those dependencies will be considered, but NVIDIA cannot guarantee
that all possible build configurations work, are not broken by
future contributions, and retain highest performance.
- Make sure that you can contribute your work to open source (no
license and/or patent conflict is introduced by your code). You need
to complete the CLA described below before your PR can be merged.
- Thanks in advance for your patience as we review your contributions;
we do appreciate them!
# Coding Convention
All pull requests are checked against the
[pre-commit hooks](https://github.com/pre-commit/pre-commit-hooks)
located [in the repository's top-level .pre-commit-config.yaml](.pre-commit-config.yaml).
The hooks do some sanity checking like linting and formatting.
These checks must pass to merge a change.
To run these locally, you can
[install pre-commit,](https://pre-commit.com/#install)
then run `pre-commit install` inside the cloned repo. When you
commit a change, the pre-commit hooks will run automatically.
If a fix is implemented by a pre-commit hook, adding the file again
and running `git commit` a second time will pass and successfully
commit.
# Contributor License Agreement (CLA)
Triton requires that all contributors (or their corporate entity) send
a signed copy of the [Contributor License
Agreement](https://github.com/NVIDIA/triton-inference-server/blob/master/Triton-CCLA-v1.pdf)
to triton-cla@nvidia.com.
*NOTE*: Contributors with no company affiliation can fill `N/A` in the
`Corporation Name` and `Corporation Address` fields.
================================================
FILE: Dockerfile.QA
================================================
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ARG BASE_IMAGE=tritonserver
ARG CIBASE_IMAGE=tritonserver_cibase
ARG SDK_IMAGE=tritonserver_sdk
ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_THIRD_PARTY_REPO_TAG=main
ARG TRITON_BACKEND_REPO_TAG=main
ARG TRITONTMP_DIR=/tmp
ARG IGPU_BUILD=0
############################################################################
## Test artifacts built as part of the tritonserver build are
## available in CIBASE_IMAGE. Copy these artifacts into the QA area.
############################################################################
FROM ${CIBASE_IMAGE} AS cibase
ARG TRITONTMP_DIR
ARG TRITON_REPO_ORGANIZATION
ARG TRITON_COMMON_REPO_TAG
ARG TRITON_CORE_REPO_TAG
ARG TRITON_THIRD_PARTY_REPO_TAG
ARG TRITON_BACKEND_REPO_TAG
ARG IGPU_BUILD
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
libarchive-dev \
libboost-dev \
python3-dev \
python3-pip \
python3-wheel \
python3-setuptools \
python3-venv \
rapidjson-dev \
software-properties-common && \
rm -rf /var/lib/apt/lists/*
RUN pip3 install cmake==4.0.3
ENV CMAKE_POLICY_VERSION_MINIMUM=3.5
# Add densenet_onnx model to example repo
# FIXME: This should be changed to using the fetch_models.sh script
# in order to ensure the public facing docs are up-to-date.
WORKDIR /workspace/docs/examples/model_repository
RUN mkdir -p densenet_onnx/1 && \
wget -O densenet_onnx/1/model.onnx \
https://github.com/onnx/models/raw/main/validated/vision/classification/densenet-121/model/densenet-7.onnx
# Update the qa/ directory with test executables, models, etc.
WORKDIR /workspace
RUN mkdir -p qa/common && \
cp -r /workspace/src/test/models/repeat_int32 qa/L0_decoupled/models/ && \
cp -r /workspace/src/test/models/square_int32 qa/L0_decoupled/models/ && \
mkdir qa/L0_simple_example/models && \
cp -r docs/examples/model_repository/simple qa/L0_simple_example/models/. && \
mkdir qa/L0_simple_go_client/models && \
cp -r docs/examples/model_repository/simple qa/L0_simple_go_client/models/. && \
mkdir qa/L0_backend_release/simple_models && \
cp -r docs/examples/model_repository/simple qa/L0_backend_release/simple_models/. && \
mkdir qa/L0_simple_nodejs_client/models && \
cp -r docs/examples/model_repository/simple qa/L0_simple_nodejs_client/models/. && \
mkdir qa/L0_backend_release/simple_seq_models && \
cp -r /workspace/docs/examples/model_repository/simple_sequence qa/L0_backend_release/simple_seq_models/. && \
mkdir qa/L0_shared_memory/models && \
cp -r docs/examples/model_repository/simple qa/L0_shared_memory/models/. && \
mkdir qa/L0_cuda_shared_memory/models && \
cp -r docs/examples/model_repository/simple qa/L0_cuda_shared_memory/models/. && \
mkdir qa/L0_client_java/models && \
cp -r docs/examples/model_repository/simple qa/L0_client_java/models && \
mkdir qa/L0_grpc/models && \
cp -r docs/examples/model_repository/simple qa/L0_grpc/models && \
cp -r docs/examples/model_repository/simple_dyna_sequence qa/L0_grpc/models && \
cp -r docs/examples/model_repository/simple_int8 qa/L0_grpc/models && \
cp -r docs/examples/model_repository/simple_identity qa/L0_grpc/models && \
cp -r docs/examples/model_repository/simple_sequence qa/L0_grpc/models && \
cp -r docs/examples/model_repository/simple_string qa/L0_grpc/models && \
cp -r docs/examples/model_repository/densenet_onnx qa/L0_grpc/models && \
mkdir qa/L0_grpc_state_cleanup/models && \
cp -r /workspace/src/test/models/repeat_int32 qa/L0_grpc_state_cleanup/models/ && \
mkdir qa/L0_http/models && \
cp -r docs/examples/model_repository/simple qa/L0_http/models && \
cp -r docs/examples/model_repository/simple_dyna_sequence qa/L0_http/models && \
cp -r docs/examples/model_repository/simple_identity qa/L0_http/models && \
cp -r docs/examples/model_repository/simple_sequence qa/L0_http/models && \
cp -r docs/examples/model_repository/simple_string qa/L0_http/models && \
cp -r docs/examples/model_repository/densenet_onnx qa/L0_http/models && \
mkdir qa/L0_https/models && \
cp -r docs/examples/model_repository/simple qa/L0_https/models/. && \
mkdir qa/L0_secure_grpc/models && \
cp -r docs/examples/model_repository/simple qa/L0_secure_grpc/models/. && \
cp bin/simple qa/L0_simple_lib/. && \
cp bin/memory_alloc qa/L0_io/. && \
cp bin/multi_server qa/L0_multi_server/. && \
cp bin/memory_test qa/L0_memory/. && \
cp bin/pinned_memory_manager_test qa/L0_memory/. && \
mkdir -p qa/L0_memory/python_models/repeat_int32/1 && \
cp bin/repo_agent_test qa/L0_triton_repo_agent/. && \
cp lib/libtritonrepoagent_relocation.so qa/L0_triton_repo_agent/. && \
mkdir qa/L0_query/models/query/1 && \
cp tritonbuild/tritonserver/backends/query/libtriton_query.so qa/L0_query/models/query/1/. && \
cp bin/query_test qa/L0_query/. && \
mkdir qa/L0_iterative_sequence/models/iterative_sequence/1 && \
cp tritonbuild/tritonserver/backends/iterative_sequence/libtriton_iterative_sequence.so qa/L0_iterative_sequence/models/iterative_sequence/1/. && \
cp bin/register_api_test qa/L0_register/. && \
cp bin/async_work_queue_test qa/L0_async_work_queue/. && \
cp tritonbuild/tritonserver/backends/implicit_state/libtriton_implicit_state.so \
qa/L0_implicit_state/. && \
mkdir qa/L0_data_compression/models && \
cp -r docs/examples/model_repository/simple qa/L0_data_compression/models && \
cp bin/data_compressor_test qa/L0_data_compression/. && \
cp bin/tensor_size_test qa/L0_input_validation/. && \
cp bin/metrics_api_test qa/L0_metrics/. && \
cp bin/response_cache_test qa/L0_response_cache/. && \
cp bin/request_cancellation_test qa/L0_request_cancellation/. && \
cp bin/triton_json_test qa/L0_json/. && \
cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \
cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/. && \
cp bin/input_byte_size_test qa/L0_input_validation/.
RUN mkdir -p qa/pkgs && \
cp python/triton*.whl qa/pkgs/. && \
cp -rf python/test/. qa/L0_python_api/.
RUN mkdir -p qa/L0_simple_ensemble/models/simple/1 && \
cp docs/examples/model_repository/simple/1/model.onnx \
qa/L0_simple_ensemble/models/simple/1/. && \
mkdir -p qa/L0_simple_ensemble/models/simple/2 && \
cp docs/examples/model_repository/simple/1/model.onnx \
qa/L0_simple_ensemble/models/simple/2/. && \
mkdir -p qa/L0_socket/models/simple/1 && \
cp docs/examples/model_repository/simple/1/model.onnx \
qa/L0_socket/models/simple/1/.
RUN mkdir -p qa/L0_backend_identity/models && \
cp -r src/test/models/identity_fp32 qa/L0_backend_identity/models/. && \
mkdir -p qa/L0_backend_identity/models/identity_fp32/1
RUN mkdir -p qa/custom_models/custom_sequence_int32/1 && \
cp tritonbuild/tritonserver/backends/sequence/libtriton_sequence.so \
qa/custom_models/custom_sequence_int32/1/. && \
mkdir -p qa/custom_models/custom_dyna_sequence_int32/1 && \
cp tritonbuild/tritonserver/backends/dyna_sequence/libtriton_dyna_sequence.so \
qa/custom_models/custom_dyna_sequence_int32/1/.
# L0_lifecycle needs No-GPU build of identity backend.
RUN cd tritonbuild/identity && \
rm -rf install build && mkdir build && cd build && \
cmake -DTRITON_ENABLE_GPU=OFF \
-DCMAKE_INSTALL_PREFIX:PATH=/workspace/tritonbuild/identity/install \
-DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
-DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
-DTRITON_THIRD_PARTY_REPO_TAG:STRING=${TRITON_THIRD_PARTY_REPO_TAG} \
-DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
make -j16 install
# L0_backend_python test require triton_shm_monitor
ARG TRITON_BOOST_URL="https://archives.boost.io/release/1.80.0/source/boost_1_80_0.tar.gz"
RUN cd tritonbuild/python && \
rm -rf install build && mkdir build && cd build && \
cmake -DCMAKE_INSTALL_PREFIX:PATH=/workspace/tritonbuild/python/install \
-DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
-DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
-DTRITON_BOOST_URL:STRING=${TRITON_BOOST_URL} \
-DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
make -j16 triton-shm-monitor install
RUN cp tritonbuild/identity/install/backends/identity/libtriton_identity.so \
qa/L0_lifecycle/. && \
cp tritonbuild/python/install/backends/python/triton_shm_monitor*.so \
qa/common/. && \
mkdir -p qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/1 && \
mkdir -p qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/1 && \
mkdir -p qa/L0_infer_shm && \
cp -r qa/L0_infer/. qa/L0_infer_shm && \
mkdir -p qa/L0_infer_cudashm && \
cp -r qa/L0_infer/. qa/L0_infer_cudashm && \
mkdir -p qa/L0_infer_valgrind && \
cp -r qa/L0_infer/. qa/L0_infer_valgrind && \
mkdir -p qa/L0_trt_shape_tensors_shm && \
cp -r qa/L0_trt_shape_tensors/. qa/L0_trt_shape_tensors_shm && \
mkdir -p qa/L0_trt_shape_tensors_cudashm && \
cp -r qa/L0_trt_shape_tensors/. qa/L0_trt_shape_tensors_cudashm && \
mkdir -p qa/L0_batcher_shm && \
cp -r qa/L0_batcher/. qa/L0_batcher_shm && \
mkdir -p qa/L0_batcher_cudashm && \
cp -r qa/L0_batcher/. qa/L0_batcher_cudashm && \
mkdir -p qa/L0_batcher_valgrind && \
cp -r qa/L0_batcher/. qa/L0_batcher_valgrind && \
mkdir -p qa/L0_sequence_batcher_shm && \
cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_shm && \
mkdir -p qa/L0_sequence_batcher_cudashm && \
cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_cudashm && \
mkdir -p qa/L0_sequence_batcher_valgrind && \
cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_valgrind && \
mkdir -p qa/L0_perf_nomodel_shm && \
cp -r qa/L0_perf_nomodel/. qa/L0_perf_nomodel_shm && \
mkdir -p qa/L0_perf_nomodel_cudashm && \
cp -r qa/L0_perf_nomodel/. qa/L0_perf_nomodel_cudashm
# L0_model_control_stress will not be present if gitlab tests are not available
RUN if [ -d qa/L0_model_control_stress ]; then \
mkdir -p qa/L0_model_control_stress_valgrind && \
cp -r qa/L0_model_control_stress/. qa/L0_model_control_stress_valgrind && \
mkdir -p qa/L0_model_control_stress_valgrind_massif && \
cp -r qa/L0_model_control_stress/. qa/L0_model_control_stress_valgrind_massif; \
fi
RUN mkdir -p qa/L0_decoupled/models/repeat_int32/1 && \
mkdir -p qa/L0_decoupled/models/square_int32/1 && \
mkdir -p qa/L0_decoupled/models/identity_int32/1 && \
mkdir -p qa/L0_decoupled/models/simple_repeat/1 && \
mkdir -p qa/L0_decoupled/models/fan_repeat/1 && \
mkdir -p qa/L0_decoupled/models/sequence_repeat/1 && \
mkdir -p qa/L0_decoupled/models/repeat_square/1 && \
mkdir -p qa/L0_decoupled/models/nested_square/1 && \
mkdir -p qa/L0_grpc_state_cleanup/models/repeat_int32/1
RUN if [ "$IGPU_BUILD" == "0" ]; then \
cp backends/repeat/libtriton_repeat.so qa/L0_model_config && \
cp backends/repeat/libtriton_repeat.so qa/L0_decoupled/models/repeat_int32/1 && \
cp backends/repeat/libtriton_repeat.so qa/L0_grpc_state_cleanup/models/repeat_int32/1/. && \
cp backends/square/libtriton_square.so qa/L0_decoupled/models/square_int32/1; \
fi
RUN cp -r qa/L0_decoupled/models qa/L0_decoupled/python_models/ && \
cp /workspace/tritonbuild/python/examples/decoupled/repeat_model.py \
qa/L0_decoupled/python_models/repeat_int32/1/. && \
cp /workspace/tritonbuild/python/examples/decoupled/repeat_config.pbtxt \
qa/L0_decoupled/python_models/repeat_int32/. && \
cp /workspace/tritonbuild/python/examples/decoupled/square_model.py \
qa/L0_decoupled/python_models/square_int32/1/. && \
cp /workspace/tritonbuild/python/examples/decoupled/square_config.pbtxt \
qa/L0_decoupled/python_models/square_int32/. && \
cp /workspace/tritonbuild/python/examples/decoupled/repeat_model.py \
qa/L0_memory/python_models/repeat_int32/1/model.py && \
cp /workspace/tritonbuild/python/examples/decoupled/repeat_config.pbtxt \
qa/L0_memory/python_models/repeat_int32/config.pbtxt
RUN mkdir -p qa/L0_decoupled_grpc_error && \
cp -r qa/L0_decoupled/. qa/L0_decoupled_grpc_error
RUN mkdir -p qa/L0_grpc_error_state_cleanup && \
cp -r qa/L0_grpc_state_cleanup/. qa/L0_grpc_error_state_cleanup
RUN mkdir -p qa/L0_repoagent_checksum/models/identity_int32/1 && \
cp tritonbuild/identity/install/backends/identity/libtriton_identity.so \
qa/L0_repoagent_checksum/models/identity_int32/1/.
RUN mkdir -p qa/L0_passive_instance/models/distributed_int32_int32_int32/1 && \
cp tritonbuild/tritonserver/backends/distributed_addsub/libtriton_distributed_addsub.so \
qa/L0_passive_instance/models/distributed_int32_int32_int32/1/.
############################################################################
## Copy artifacts from sdk container
############################################################################
FROM ${SDK_IMAGE} AS sdk
ARG TARGETPLATFORM
WORKDIR /workspace
COPY --from=cibase /workspace/qa/ qa/
RUN mkdir -p qa/clients && mkdir -p qa/pkgs && \
cp -a install/bin/* qa/clients/. && \
cp install/lib/libgrpcclient.so qa/clients/. && \
cp install/lib/libhttpclient.so qa/clients/. && \
cp install/python/*.py qa/clients/. && \
cp install/python/triton*.whl qa/pkgs/. && \
cp install/java/examples/*.jar qa/clients/.
RUN cp client/src/grpc_generated/go/*.go qa/L0_simple_go_client/. && \
cp client/src/grpc_generated/javascript/*.js qa/L0_simple_nodejs_client/. && \
cp client/src/grpc_generated/javascript/*.json qa/L0_simple_nodejs_client/. && \
cp -r client/src/grpc_generated/java qa/L0_client_java/.
############################################################################
## Create CI enabled image
############################################################################
FROM $BASE_IMAGE
ARG TARGETPLATFORM
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
# install platform specific packages
RUN if grep -qE '^VERSION_ID="(18\.04|20\.04|22\.04|24\.04)' /etc/os-release; then \
apt-get update && \
apt-get install -y --no-install-recommends \
libpng-dev; \
else \
echo "Ubuntu version must be either 18.04, 20.04, 22.04 or 24.04" && \
exit 1; \
fi
# CI/QA for memcheck requires valgrind
# libarchive-dev is required by Python backend
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
gdb \
libopencv-dev \
libarchive-dev \
libopencv-core-dev \
libzmq3-dev \
openjdk-11-jdk \
nginx \
npm \
protobuf-compiler \
python3-dev \
python3-pip \
python3-protobuf \
python3-wheel \
python3-setuptools \
swig \
valgrind && \
rm -rf /var/lib/apt/lists/*
# CI/QA expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
ln -s /usr/bin/python3 /usr/bin/python
RUN pip3 install --upgrade "numpy<2" pillow attrdict future "grpcio<1.68" requests gsutil \
"awscli<=1.36.40" six "grpcio-channelz<1.68" prettytable virtualenv \
check-jsonschema
# go needed for example go client test.
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
wget https://golang.org/dl/go1.22.3.linux-arm64.tar.gz && \
rm -rf /usr/local/go && tar -C /usr/local -xzf go1.22.3.linux-arm64.tar.gz && \
rm -f go1.22.3.linux-arm64.tar.gz; \
else \
wget https://golang.org/dl/go1.22.3.linux-amd64.tar.gz && \
rm -rf /usr/local/go && tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz && \
rm -f go1.22.3.linux-amd64.tar.gz; \
fi
ENV GOPATH /root/go
ENV PATH $PATH:/usr/local/go/bin:$GOPATH/bin
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
# CI expects tests in /opt/tritonserver/qa. The triton-server (1000)
# user should own all artifacts in case CI is run using triton-server
# user.
WORKDIR /opt/tritonserver
COPY --chown=1000:1000 --from=sdk /workspace/qa/ qa/
# Remove CI tests that are meant to run only on build image and
# install the tritonserver/triton python client APIs.
RUN rm -fr qa/L0_copyrights qa/L0_build_variants && \
find qa/pkgs/ -maxdepth 1 -type f -name \
"tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
xargs pip3 install --upgrade
ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH}
# DLIS-3631: Needed to run Perf Analyzer CI tests correctly
ENV LD_LIBRARY_PATH /opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}
# Required for PyTorch to pickup the correct HPCX libraries
ENV LD_LIBRARY_PATH /opt/hpcx/ucc/lib/:/opt/hpcx/ucx/lib/:${LD_LIBRARY_PATH}
================================================
FILE: Dockerfile.sdk
================================================
# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Multistage build.
#
# Base image on the minimum Triton container
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:26.02-py3-min
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
ARG TRITON_COMMON_REPO_TAG=main
ARG TRITON_CORE_REPO_TAG=main
ARG TRITON_CLIENT_REPO_TAG=main
ARG TRITON_THIRD_PARTY_REPO_TAG=main
ARG TRITON_ENABLE_GPU=ON
ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
# DCGM version to install for Model Analyzer
ARG DCGM_VERSION=4.5.2-1
ARG NVIDIA_TRITON_SERVER_SDK_VERSION=unknown
ARG NVIDIA_BUILD_ID=unknown
############################################################################
## Build image
############################################################################
FROM ${BASE_IMAGE} AS sdk_build
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
RUN apt-get update && \
apt-get install -y --no-install-recommends \
autoconf \
automake \
build-essential \
ca-certificates \
curl \
git \
gperf \
libb64-dev \
libgoogle-perftools-dev \
libopencv-core-dev \
libopencv-dev \
libssl-dev \
libtool \
maven \
openjdk-11-jdk \
pkg-config \
python3 \
python3-dev \
python3-pdfkit \
python3-pip \
python3-setuptools \
python3-wheel \
rapidjson-dev \
software-properties-common \
vim \
wget && \
pip3 install --upgrade "grpcio-tools<1.68" cmake==4.0.3
ENV CMAKE_POLICY_MINIMUM_REQUIRED=3.5
# Build expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
ln -s /usr/bin/python3 /usr/bin/python
# Build the client library and examples
ARG TRITON_REPO_ORGANIZATION
ARG TRITON_CLIENT_REPO_SUBDIR
ARG TRITON_COMMON_REPO_TAG
ARG TRITON_CORE_REPO_TAG
ARG TRITON_CLIENT_REPO_TAG
ARG TRITON_THIRD_PARTY_REPO_TAG
ARG TRITON_ENABLE_GPU
ARG JAVA_BINDINGS_MAVEN_VERSION
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
ARG TARGETPLATFORM
WORKDIR /workspace
COPY TRITON_VERSION .
COPY ${TRITON_CLIENT_REPO_SUBDIR} client
WORKDIR /workspace/client_build
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
-DTRITON_ENABLE_PERF_ANALYZER=OFF \
-DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
-DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
-DTRITON_ENABLE_JAVA_HTTP=ON \
-DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
RUN cmake --build . -v --parallel --target cc-clients java-clients python-clients
# Install Java API Bindings
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
source /workspace/client/src/java-api-bindings/scripts/install_dependencies_and_build.sh \
--maven-version ${JAVA_BINDINGS_MAVEN_VERSION} \
--core-tag ${TRITON_CORE_REPO_TAG} \
--javacpp-tag ${JAVA_BINDINGS_JAVACPP_PRESETS_TAG} \
--jar-install-path /workspace/install/java-api-bindings; \
fi
############################################################################
## Create sdk container
############################################################################
FROM ${BASE_IMAGE}
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
ENV PIP_BREAK_SYSTEM_PACKAGES=1
ARG DCGM_VERSION
ARG TRITON_REPO_ORGANIZATION
ARG TRITON_CORE_REPO_TAG
ARG TARGETPLATFORM
ARG TRITON_ENABLE_GPU
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
default-jdk \
git \
gperf \
libb64-dev \
libgoogle-perftools-dev \
libopencv-core-dev \
libopencv-dev \
libssl-dev \
libtool \
maven \
perl \
python3 \
python3-dev \
python3-pdfkit \
python3-pip \
python3-setuptools \
python3-wheel \
vim \
wget && \
pip3 install "grpcio<1.68" "grpcio-tools<1.68"
WORKDIR /workspace
COPY TRITON_VERSION .
COPY NVIDIA_Deep_Learning_Container_License.pdf .
COPY --from=sdk_build /workspace/client/ client/
COPY --from=sdk_build /workspace/install/ install/
RUN cd install && \
export VERSION=`cat /workspace/TRITON_VERSION` && \
tar zcf /workspace/v$VERSION.clients.tar.gz *
# For CI testing need to copy over L0_sdk test and L0_client_build_variants test.
RUN mkdir qa
COPY qa/L0_sdk qa/L0_sdk
COPY qa/L0_client_build_variants qa/L0_client_build_variants
# Create a directory for all the python client tests to enable unit testing
RUN mkdir -p qa/python_client_unit_tests/
COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_client_unit_tests/
# Install an image needed by the quickstart and other documentation.
COPY qa/images/mug.jpg images/mug.jpg
# Install the dependencies needed to run the client examples. These
# are not needed for building but including them allows this image to
# be used to run the client examples.
RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
find install/python/ -maxdepth 1 -type f -name \
"tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
xargs pip3 install --upgrade
# Install GenAI-Perf
RUN pip3 install genai-perf
# Install DCGM
RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
[ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
curl -o /tmp/cuda-keyring.deb \
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/$arch/cuda-keyring_1.1-1_all.deb \
&& apt install /tmp/cuda-keyring.deb && rm /tmp/cuda-keyring.deb && \
apt update && \
apt install --yes --no-install-recommends \
datacenter-gpu-manager-4-core=1:${DCGM_VERSION} \
datacenter-gpu-manager-4-dev=1:${DCGM_VERSION}; \
fi
# Build expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
ln -s /usr/bin/python3 /usr/bin/python
# Install Model Analyzer
ARG TRITON_MODEL_ANALYZER_REPO_TAG
ARG TRITON_MODEL_ANALYZER_REPO="${TRITON_REPO_ORGANIZATION}/model_analyzer@${TRITON_MODEL_ANALYZER_REPO_TAG}"
RUN pip3 install "git+${TRITON_MODEL_ANALYZER_REPO}"
# Entrypoint Banner
ENV NVIDIA_PRODUCT_NAME="Triton Server SDK"
COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
RUN sed 's/Server/Server SDK/' /opt/nvidia/entrypoint.d/10-banner.txt | \
sed 's/^===/=======/' > /opt/nvidia/entrypoint.d/10-banner.new && \
mv /opt/nvidia/entrypoint.d/10-banner.new /opt/nvidia/entrypoint.d/10-banner.txt
ARG NVIDIA_TRITON_SERVER_SDK_VERSION
ARG NVIDIA_BUILD_ID
ENV NVIDIA_TRITON_SERVER_SDK_VERSION=${NVIDIA_TRITON_SERVER_SDK_VERSION}
ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID}
ENV PATH=/workspace/install/bin:${PATH}
ENV LD_LIBRARY_PATH=/workspace/install/lib:${LD_LIBRARY_PATH}
# DLIS-3631: Needed to run Perf Analyzer CI tests correctly
ENV LD_LIBRARY_PATH=/opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}
# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc
ENV TCMALLOC_RELEASE_RATE=200
================================================
FILE: Dockerfile.win10.min
================================================
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Windows min container for Triton build
ARG BASE_IMAGE=mcr.microsoft.com/windows:10.0.19042.1889
FROM ${BASE_IMAGE} as dependency_base
RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine
RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
RUN choco install unzip -y
#
# Installing TensorRT
#
ARG TENSORRT_VERSION=10.8.0.43
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows.win10.cuda-12.8.zip"
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/zip/TensorRT-${TENSORRT_VERSION}.Windows.win10.cuda-12.8.zip
# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
RUN unzip /tmp/%TENSORRT_ZIP%
RUN move TensorRT-* TensorRT
LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
#
# Installing cuDNN
#
ARG CUDNN_VERSION=9.7.1.26
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.7.1.26_cuda12-archive.zip
ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
RUN unzip /tmp/%CUDNN_ZIP%
RUN move cudnn-* cudnn
LABEL CUDNN_VERSION="${CUDNN_VERSION}"
FROM ${BASE_IMAGE} as build_base
SHELL ["cmd", "/S", "/C"]
RUN mkdir c:\tmp
WORKDIR /tmp
RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine
RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
RUN choco install git docker unzip -y
#
# Installing python
#
ARG PYTHON_VERSION=3.12.3
ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
RUN pip install --upgrade wheel setuptools docker
LABEL PYTHON_VERSION=${PYTHON_VERSION}
#
# Installing CMake
#
ARG CMAKE_VERSION=4.0.3
RUN pip install cmake==%CMAKE_VERSION%
ENV CMAKE_POLICY_VERSION_MINIMUM=3.5
ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
ENV VCPKG_TARGET_TRIPLET x64-windows
LABEL CMAKE_VERSION=${CMAKE_VERSION}
# Be aware that pip can interact badly with VS cmd shell so need to pip install before
# vsdevcmd.bat (see https://bugs.python.org/issue38989)
#
# Installing Visual Studio BuildTools: VS17 2022
#
# Download collect.exe in case of an install failure.
ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"
# Use the latest release channel. For more control, specify the location of an internal layout.
# Download the Build Tools bootstrapper.
# ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
ARG BUILDTOOLS_VERSION=17.12.35506.116
ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/5536698c-711c-4834-876f-2817d31a2ef2/58894fc272e86d3c3a6d85bf3a1df1e5a0685be8b9ab65d9f3cc5c2a8c6921cc/vs_BuildTools.exe
ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
# Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
ARG VS_INSTALL_PATH_WP="C:\BuildTools"
RUN vs_buildtools.exe --quiet --wait --norestart --nocache install \
--installPath %VS_INSTALL_PATH_WP% \
--add Microsoft.VisualStudio.Workload.VCTools \
--includeRecommended \
--locale "En-us"
LABEL BUILDTOOLS_VERSION=${BUILDTOOLS_VERSION}
WORKDIR /
#
# Installing Vcpkg
#
ARG VCPGK_VERSION=2024.03.19
RUN git clone --single-branch --depth=1 -b %VCPGK_VERSION% https://github.com/microsoft/vcpkg.git
WORKDIR /vcpkg
RUN bootstrap-vcpkg.bat
RUN vcpkg.exe update
RUN vcpkg.exe install \
boost-interprocess:x64-windows \
boost-stacktrace:x64-windows \
b64:x64-windows \
openssl-windows:x64-windows \
openssl:x64-windows \
pthread:x64-windows \
rapidjson:x64-windows \
zlib:x64-windows
RUN vcpkg.exe integrate install
LABEL VCPGK_VERSION=${VCPGK_VERSION}
WORKDIR /
#
# Installing CUDA
#
ARG CUDA_MAJOR=12
ARG CUDA_MINOR=8
ARG CUDA_PATCH=0
ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
nvml_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
nvrtc_${CUDA_MAJOR}.${CUDA_MINOR} nvrtc_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
cublas_${CUDA_MAJOR}.${CUDA_MINOR} cublas_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
cufft_${CUDA_MAJOR}.${CUDA_MINOR} cufft_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
curand_${CUDA_MAJOR}.${CUDA_MINOR} curand_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
cusolver_${CUDA_MAJOR}.${CUDA_MINOR} cusolver_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
cusparse_${CUDA_MAJOR}.${CUDA_MINOR} cusparse_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
cupti_${CUDA_MAJOR}.${CUDA_MINOR} \
thrust_${CUDA_MAJOR}.${CUDA_MINOR} \
visual_studio_integration_${CUDA_MAJOR}.${CUDA_MINOR}"
ARG CUDA_INSTALL_ROOT_WP="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v${CUDA_MAJOR}.${CUDA_MINOR}"
ARG CUDA_SOURCE=https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/network_installers/cuda_${CUDA_VERSION}_windows_network.exe
ADD ${CUDA_SOURCE} cuda_${CUDA_VERSION}_windows_network.exe
RUN cuda_%CUDA_VERSION%_windows_network.exe -s %CUDA_PACKAGES%
# Copy the CUDA visualstudio integration from where it was installed
# into the appropriate place in BuildTools
RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensions\*" "%VS_INSTALL_PATH_WP%\MSBuild\Microsoft\VC\v170\BuildCustomizations"
RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
ENV CUDA_VERSION=${CUDA_VERSION}
LABEL CUDA_VERSION="${CUDA_VERSION}"
ARG CUDNN_VERSION=9.7.1.26
ENV CUDNN_VERSION ${CUDNN_VERSION}
COPY --from=dependency_base /cudnn /cudnn
RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
LABEL CUDNN_VERSION="${CUDNN_VERSION}"
ARG TENSORRT_VERSION=10.8.0.43
ENV TRT_VERSION ${TENSORRT_VERSION}
COPY --from=dependency_base /TensorRT /TensorRT
RUN setx PATH "c:\TensorRT\lib;%PATH%"
LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
# It is important that the entrypoint initialize VisualStudio
# environment otherwise the build will fail. Also set
# CMAKE_TOOLCHAIN_FILE and VCPKG_TARGET_TRIPLET so
# that cmake can find the packages installed by vcpkg.
ENTRYPOINT C:\BuildTools\VC\Auxiliary\Build\vcvars64.bat &&
================================================
FILE: LICENSE
================================================
Copyright (c) 2018-2026, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of NVIDIA CORPORATION nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: README.md
================================================
<!--
# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
[](https://opensource.org/licenses/BSD-3-Clause)
>[!WARNING]
>You are currently on the `main` branch which tracks under-development progress
>towards the next release. The current release is version [2.66.0](https://github.com/triton-inference-server/server/releases/latest)
>and corresponds to the 26.02 container release on NVIDIA GPU Cloud (NGC).
# Triton Inference Server
Triton Inference Server is an open source inference serving software that
streamlines AI inferencing. Triton enables teams to deploy any AI model from
multiple deep learning and machine learning frameworks, including TensorRT,
PyTorch, ONNX, OpenVINO, Python, RAPIDS FIL, and more. Triton
Inference Server supports inference across cloud, data center, edge and embedded
devices on NVIDIA GPUs, x86 and ARM CPU, or AWS Inferentia. Triton Inference
Server delivers optimized performance for many query types, including real time,
batched, ensembles and audio/video streaming. Triton inference Server is part of
[NVIDIA AI Enterprise](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/),
a software platform that accelerates the data science pipeline and streamlines
the development and deployment of production AI.
Major features include:
- [Supports multiple deep learning
frameworks](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton)
- [Supports multiple machine learning
frameworks](https://github.com/triton-inference-server/fil_backend)
- [Concurrent model
execution](docs/user_guide/architecture.md#concurrent-model-execution)
- [Dynamic batching](docs/user_guide/batcher.md#dynamic-batcher)
- [Sequence batching](docs/user_guide/batcher.md#sequence-batcher) and
[implicit state management](docs/user_guide/architecture.md#implicit-state-management)
for stateful models
- Provides [Backend API](https://github.com/triton-inference-server/backend) that
allows adding custom backends and pre/post processing operations
- Supports writing custom backends in python, a.k.a.
[Python-based backends.](https://github.com/triton-inference-server/backend/blob/main/docs/python_based_backends.md#python-based-backends)
- Model pipelines using
[Ensembling](docs/user_guide/architecture.md#ensemble-models) or [Business
Logic Scripting
(BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
- [HTTP/REST and GRPC inference
protocols](docs/customization_guide/inference_protocols.md) based on the community
developed [KServe
protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
- A [C API](docs/customization_guide/inprocess_c_api.md) and
[Java API](docs/customization_guide/inprocess_java_api.md)
allow Triton to link directly into your application for edge and other in-process use cases
- [Metrics](docs/user_guide/metrics.md) indicating GPU utilization, server
throughput, server latency, and more
**New to Triton Inference Server?** Make use of
[these tutorials](https://github.com/triton-inference-server/tutorials)
to begin your Triton journey!
Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learning-ai/triton-tensorrt-newsletter/) and
stay current on the latest product updates, bug fixes, content, best practices,
and more. Need enterprise support? NVIDIA global support is available for Triton
Inference Server with the
[NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/).
## Serve a Model in 3 Easy Steps
```bash
# Step 1: Create the example model repository
git clone -b r26.02 https://github.com/triton-inference-server/server.git
cd server/docs/examples
./fetch_models.sh
# Step 2: Launch triton from the NGC Triton container
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:26.02-py3 tritonserver --model-repository=/models --model-control-mode explicit --load-model densenet_onnx
# Step 3: Sending an Inference Request
# In a separate console, launch the image_client example from the NGC Triton SDK container
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:26.02-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
# Inference should return the following
Image '/workspace/images/mug.jpg':
15.346230 (504) = COFFEE MUG
13.224326 (968) = CUP
10.422965 (505) = COFFEEPOT
```
Please read the [QuickStart](docs/getting_started/quickstart.md) guide for additional information
regarding this example. The quickstart guide also contains an example of how to launch Triton on [CPU-only systems](docs/getting_started/quickstart.md#run-on-cpu-only-system). New to Triton and wondering where to get started? Watch the [Getting Started video](https://youtu.be/NQDtfSi5QF4).
## Examples and Tutorials
Check out [NVIDIA LaunchPad](https://www.nvidia.com/en-us/data-center/products/ai-enterprise-suite/trial/)
for free access to a set of hands-on labs with Triton Inference Server hosted on
NVIDIA infrastructure.
Specific end-to-end examples for popular models, such as ResNet, BERT, and DLRM
are located in the
[NVIDIA Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples)
page on GitHub. The
[NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-triton-inference-server)
contains additional documentation, presentations, and examples.
## Documentation
### Build and Deploy
The recommended way to build and use Triton Inference Server is with Docker
images.
- [Install Triton Inference Server with Docker containers](docs/customization_guide/build.md#building-with-docker) (*Recommended*)
- [Install Triton Inference Server without Docker containers](docs/customization_guide/build.md#building-without-docker)
- [Build a custom Triton Inference Server Docker container](docs/customization_guide/compose.md)
- [Build Triton Inference Server from source](docs/customization_guide/build.md#building-on-unsupported-platforms)
- [Build Triton Inference Server for Windows 10](docs/customization_guide/build.md#building-for-windows-10)
- Examples for deploying Triton Inference Server with Kubernetes and Helm on [GCP](deploy/gcp/README.md),
[AWS](deploy/aws/README.md), and [NVIDIA FleetCommand](deploy/fleetcommand/README.md)
- [Secure Deployment Considerations](docs/customization_guide/deploy.md)
### Using Triton
#### Preparing Models for Triton Inference Server
The first step in using Triton to serve your models is to place one or
more models into a [model repository](docs/user_guide/model_repository.md). Depending on
the type of the model and on what Triton capabilities you want to enable for
the model, you may need to create a [model
configuration](docs/user_guide/model_configuration.md) for the model.
- [Add custom operations to Triton if needed by your model](docs/user_guide/custom_operations.md)
- Enable model pipelining with [Model Ensemble](docs/user_guide/architecture.md#ensemble-models)
and [Business Logic Scripting (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
- Optimize your models setting [scheduling and batching](docs/user_guide/architecture.md#models-and-schedulers)
parameters and [model instances](docs/user_guide/model_configuration.md#instance-groups).
- Use the [Model Analyzer tool](https://github.com/triton-inference-server/model_analyzer)
to help optimize your model configuration with profiling
- Learn how to [explicitly manage what models are available by loading and
unloading models](docs/user_guide/model_management.md)
#### Configure and Use Triton Inference Server
- Read the [Quick Start Guide](docs/getting_started/quickstart.md) to run Triton Inference
Server on both GPU and CPU
- Triton supports multiple execution engines, called
[backends](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton), including
[TensorRT](https://github.com/triton-inference-server/tensorrt_backend),
[PyTorch](https://github.com/triton-inference-server/pytorch_backend),
[ONNX](https://github.com/triton-inference-server/onnxruntime_backend),
[OpenVINO](https://github.com/triton-inference-server/openvino_backend),
[Python](https://github.com/triton-inference-server/python_backend), and more
- Not all the above backends are supported on every platform supported by Triton.
Look at the
[Backend-Platform Support Matrix](https://github.com/triton-inference-server/backend/blob/main/docs/backend_platform_support_matrix.md)
to learn which backends are supported on your target platform.
- Learn how to [optimize performance](docs/user_guide/optimization.md) using the
[Performance Analyzer](https://github.com/triton-inference-server/perf_analyzer/blob/main/README.md)
and
[Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
- Learn how to [manage loading and unloading models](docs/user_guide/model_management.md) in
Triton
- Send requests directly to Triton with the [HTTP/REST JSON-based
or gRPC protocols](docs/customization_guide/inference_protocols.md#httprest-and-grpc-protocols)
#### Client Support and Examples
A Triton *client* application sends inference and other requests to Triton. The
[Python and C++ client libraries](https://github.com/triton-inference-server/client)
provide APIs to simplify this communication.
- Review client examples for [C++](https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/examples),
[Python](https://github.com/triton-inference-server/client/blob/main/src/python/examples),
and [Java](https://github.com/triton-inference-server/client/blob/main/src/java/src/main/java/triton/client/examples)
- Configure [HTTP](https://github.com/triton-inference-server/client#http-options)
and [gRPC](https://github.com/triton-inference-server/client#grpc-options)
client options
- Send input data (e.g. a jpeg image) directly to Triton in the [body of an HTTP
request without any additional metadata](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_binary_data.md#raw-binary-request)
### Extend Triton
[Triton Inference Server's architecture](docs/user_guide/architecture.md) is specifically
designed for modularity and flexibility
- [Customize Triton Inference Server container](docs/customization_guide/compose.md) for your use case
- [Create custom backends](https://github.com/triton-inference-server/backend)
in either [C/C++](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api)
or [Python](https://github.com/triton-inference-server/python_backend)
- Create [decoupled backends and models](docs/user_guide/decoupled_models.md) that can send
multiple responses for a request or not send any responses for a request
- Use a [Triton repository agent](docs/customization_guide/repository_agents.md) to add functionality
that operates when a model is loaded and unloaded, such as authentication,
decryption, or conversion
- Deploy Triton on [Jetson and JetPack](docs/user_guide/jetson.md)
- [Use Triton on AWS
Inferentia](https://github.com/triton-inference-server/python_backend/tree/main/inferentia)
### Additional Documentation
- [FAQ](docs/user_guide/faq.md)
- [User Guide](docs/README.md#user-guide)
- [Customization Guide](docs/README.md#customization-guide)
- [Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html)
- [GPU, Driver, and CUDA Support
Matrix](https://docs.nvidia.com/deeplearning/dgx/support-matrix/index.html)
## Contributing
Contributions to Triton Inference Server are more than welcome. To
contribute please review the [contribution
guidelines](CONTRIBUTING.md). If you have a backend, client,
example or similar contribution that is not modifying the core of
Triton, then you should file a PR in the [contrib
repo](https://github.com/triton-inference-server/contrib).
## Reporting problems, asking questions
We appreciate any feedback, questions or bug reporting regarding this project.
When posting [issues in GitHub](https://github.com/triton-inference-server/server/issues),
follow the process outlined in the [Stack Overflow document](https://stackoverflow.com/help/mcve).
Ensure posted examples are:
- minimal – use as little code as possible that still produces the
same problem
- complete – provide all parts needed to reproduce the problem. Check
if you can strip external dependencies and still show the problem. The
less time we spend on reproducing problems the more time we have to
fix it
- verifiable – test the code you're about to provide to make sure it
reproduces the problem. Remove all other problems that are not
related to your request/question.
For issues, please use the provided bug report and feature request templates.
For questions, we recommend posting in our community
[GitHub Discussions.](https://github.com/triton-inference-server/server/discussions)
## For more information
Please refer to the [NVIDIA Developer Triton page](https://developer.nvidia.com/nvidia-triton-inference-server)
for more information.
================================================
FILE: SECURITY.md
================================================
<!--
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
# Report a Security Vulnerability
To report a potential security vulnerability in any NVIDIA product, please use either:
* This web form: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html), or
* Send email to: [NVIDIA PSIRT](mailto:psirt@nvidia.com)
**OEM Partners should contact their NVIDIA Customer Program Manager**
If reporting a potential vulnerability via email, please encrypt it using NVIDIA’s public PGP key ([see PGP Key page](https://www.nvidia.com/en-us/security/pgp-key/)) and include the following information:
1. Product/Driver name and version/branch that contains the vulnerability
2. Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
3. Instructions to reproduce the vulnerability
4. Proof-of-concept or exploit code
5. Potential impact of the vulnerability, including how an attacker could exploit the vulnerability
See https://www.nvidia.com/en-us/security/ for past NVIDIA Security Bulletins and Notices.
================================================
FILE: TRITON_VERSION
================================================
2.67.0dev
================================================
FILE: build.py
================================================
#!/usr/bin/env python3
# Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
import importlib.util
import multiprocessing
import os
import os.path
import pathlib
import platform
import stat
import subprocess
import sys
from inspect import getsourcefile
import distro
import requests
#
# Build Triton Inference Server.
#
# By default build.py builds the Triton Docker image, but can also be
# used to build without Docker. See docs/build.md and --help for more
# information.
#
# The TRITON_VERSION file indicates the Triton version and
# DEFAULT_TRITON_VERSION_MAP is used to determine the corresponding container
# version and upstream container version (upstream containers are
# dependencies required by Triton). These versions may be overridden.
# Map from Triton version to corresponding container and component versions.
#
# triton version ->
# (triton container version,
# upstream container version,
# ORT version,
# ORT OpenVINO version (use None to disable OpenVINO in ORT),
# Standalone OpenVINO version,
# DCGM version
# )
#
# Currently the OpenVINO versions used in ORT and standalone must
# match because of the way dlopen works with loading the backends. If
# different versions are used then one backend or the other will
# incorrectly load the other version of the openvino libraries.
#
DEFAULT_TRITON_VERSION_MAP = {
"release_version": "2.67.0dev",
"triton_container_version": "26.03dev",
"upstream_container_version": "26.02",
"ort_version": "1.24.2",
"ort_openvino_version": "2026.0.0",
"standalone_openvino_version": "2026.0.0",
"dcgm_version": "4.5.2-1",
"vllm_version": "0.16.0",
"rhel_py_version": "3.12.3",
}
CORE_BACKENDS = ["ensemble"]
FLAGS = None
EXTRA_CORE_CMAKE_FLAGS = {}
OVERRIDE_CORE_CMAKE_FLAGS = {}
EXTRA_BACKEND_CMAKE_FLAGS = {}
OVERRIDE_BACKEND_CMAKE_FLAGS = {}
THIS_SCRIPT_DIR = os.path.dirname(os.path.abspath(getsourcefile(lambda: 0)))
def log(msg, force=False):
if force or not FLAGS.quiet:
try:
print(msg, file=sys.stderr)
except Exception:
print("<failed to log>", file=sys.stderr)
def log_verbose(msg):
if FLAGS.verbose:
log(msg, force=True)
def fail(msg):
fail_if(True, msg)
def fail_if(p, msg):
if p:
print("error: {}".format(msg), file=sys.stderr)
sys.exit(1)
def target_platform():
# When called by compose.py, FLAGS will be None
if FLAGS and FLAGS.target_platform is not None:
return FLAGS.target_platform
platform_string = platform.system().lower()
if platform_string == "linux":
# Need to inspect the /etc/os-release file to get
# the distribution of linux
id_like_list = distro.like().split()
if "debian" in id_like_list:
return "linux"
else:
return "rhel"
else:
return platform_string
def target_machine():
# When called by compose.py, FLAGS will be None
if FLAGS and FLAGS.target_machine is not None:
return FLAGS.target_machine
return platform.machine().lower()
def container_versions(version, container_version, upstream_container_version):
if container_version is None:
container_version = FLAGS.triton_container_version
if upstream_container_version is None:
upstream_container_version = FLAGS.upstream_container_version
return container_version, upstream_container_version
class BuildScript:
"""Utility class for writing build scripts"""
def __init__(self, filepath, desc=None, verbose=False):
self._filepath = filepath
self._file = open(self._filepath, "w")
self._verbose = verbose
self.header(desc)
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
self.close()
def __del__(self):
self.close()
def close(self):
if self._file is not None:
if target_platform() == "windows":
self.blankln()
self._file.write("}\n")
self._file.write("catch {\n")
self._file.write(" $_;\n")
self._file.write(" ExitWithCode 1;\n")
self._file.write("}\n")
"""Close the file"""
self._file.close()
self._file = None
st = os.stat(self._filepath)
os.chmod(self._filepath, st.st_mode | stat.S_IEXEC)
def blankln(self):
self._file.write("\n")
def commentln(self, cnt):
self._file.write("#" * cnt + "\n")
def comment(self, msg=""):
if not isinstance(msg, str):
try:
for m in msg:
self._file.write(f"# {msg}\n")
return
except TypeError:
pass
self._file.write(f"# {msg}\n")
def comment_verbose(self, msg=""):
if self._verbose:
self.comment(msg)
def header(self, desc=None):
if target_platform() != "windows":
self._file.write("#!/usr/bin/env bash\n\n")
if desc is not None:
self.comment()
self.comment(desc)
self.comment()
self.blankln()
self.comment("Exit script immediately if any command fails")
if target_platform() == "windows":
self._file.write("$UseStructuredOutput = $false\n")
self.blankln()
self._file.write("function ExitWithCode($exitcode) {\n")
self._file.write(" $host.SetShouldExit($exitcode)\n")
self._file.write(" exit $exitcode\n")
self._file.write("}\n")
self.blankln()
if self._verbose:
self._file.write("Set-PSDebug -Trace 1\n")
self.blankln()
self._file.write("try {\n")
else:
self._file.write("set -e\n")
if self._verbose:
self._file.write("set -x\n")
self.blankln()
def envvar_ref(self, v):
if target_platform() == "windows":
return f"${{env:{v}}}"
return f"${{{v}}}"
def cmd(self, clist, check_exitcode=False):
if isinstance(clist, str):
self._file.write(f"{clist}\n")
else:
for c in clist:
self._file.write(f"{c} ")
self.blankln()
if check_exitcode:
if target_platform() == "windows":
self._file.write("if ($LASTEXITCODE -ne 0) {\n")
self._file.write(
' Write-Output "exited with status code $LASTEXITCODE";\n'
)
self._file.write(" ExitWithCode 1;\n")
self._file.write("}\n")
def cwd(self, path):
if target_platform() == "windows":
self.cmd(f"Set-Location -EV Err -EA Stop {path}")
else:
self.cmd(f"cd {path}")
def cp(self, src, dest):
if target_platform() == "windows":
self.cmd(f"Copy-Item -EV Err -EA Stop {src} -Destination {dest}")
else:
self.cmd(f"cp {src} {dest}")
def mkdir(self, path):
if target_platform() == "windows":
self.cmd(
f"New-Item -EV Err -EA Stop -ItemType Directory -Force -Path {path}"
)
else:
self.cmd(f"mkdir -p {pathlib.Path(path)}")
def rmdir(self, path):
if target_platform() == "windows":
self.cmd(f"if (Test-Path -Path {path}) {{")
self.cmd(f" Remove-Item -EV Err -EA Stop -Recurse -Force {path}")
self.cmd("}")
else:
self.cmd(f"rm -fr {pathlib.Path(path)}")
def cpdir(self, src, dest):
if target_platform() == "windows":
self.cmd(f"Copy-Item -EV Err -EA Stop -Recurse {src} -Destination {dest}")
else:
self.cmd(f"cp -r {src} {dest}")
def tar(self, subdir, tar_filename):
if target_platform() == "windows":
fail("unsupported operation: tar")
else:
self.cmd(f"tar zcf {tar_filename} {subdir}")
def cmake(self, args):
# Pass some additional envvars into cmake...
env_args = []
for k in ("TRT_VERSION", "CMAKE_TOOLCHAIN_FILE", "VCPKG_TARGET_TRIPLET"):
env_args += [f'"-D{k}={self.envvar_ref(k)}"']
self.cmd(f'cmake {" ".join(env_args)} {" ".join(args)}', check_exitcode=True)
def makeinstall(self, target="install"):
verbose_flag = "-v" if self._verbose else ""
self.cmd(
f"cmake --build . --config {FLAGS.build_type} -j{FLAGS.build_parallel} {verbose_flag} -t {target}"
)
def gitclone(self, repo, tag, subdir, org):
clone_dir = subdir
if not FLAGS.no_force_clone:
self.rmdir(clone_dir)
if target_platform() == "windows":
self.cmd(f"if (-Not (Test-Path -Path {clone_dir})) {{")
else:
self.cmd(f"if [[ ! -e {clone_dir} ]]; then")
# FIXME [DLIS-4045 - Currently the tag starting with "pull/" is not
# working with "--repo-tag" as the option is not forwarded to the
# individual repo build correctly.]
# If 'tag' starts with "pull/" then it must be of form
# "pull/<pr>/head". We just clone at "main" and then fetch the
# reference onto a new branch we name "tritonbuildref".
if tag.startswith("pull/"):
self.cmd(
f" git clone --recursive --depth=1 {org}/{repo}.git {subdir}; git --git-dir {subdir}/.git log --oneline -1",
check_exitcode=True,
)
self.cmd("}" if target_platform() == "windows" else "fi")
self.cwd(subdir)
self.cmd(f"git fetch origin {tag}:tritonbuildref", check_exitcode=True)
self.cmd(f"git checkout tritonbuildref", check_exitcode=True)
else:
self.cmd(
f" git clone --recursive --single-branch --depth=1 -b {tag} {org}/{repo}.git {subdir}; git --git-dir {subdir}/.git log --oneline -1",
check_exitcode=True,
)
self.cmd("}" if target_platform() == "windows" else "fi")
def cmake_core_arg(name, type, value):
# Return cmake -D setting to set name=value for core build. Use
# command-line specified value if one is given.
if name in OVERRIDE_CORE_CMAKE_FLAGS:
value = OVERRIDE_CORE_CMAKE_FLAGS[name]
if type is None:
type = ""
else:
type = ":{}".format(type)
return '"-D{}{}={}"'.format(name, type, value)
def cmake_core_enable(name, flag):
# Return cmake -D setting to set name=flag?ON:OFF for core
# build. Use command-line specified value for 'flag' if one is
# given.
if name in OVERRIDE_CORE_CMAKE_FLAGS:
value = OVERRIDE_CORE_CMAKE_FLAGS[name]
else:
value = "ON" if flag else "OFF"
return '"-D{}:BOOL={}"'.format(name, value)
def cmake_core_extra_args():
args = []
for k, v in EXTRA_CORE_CMAKE_FLAGS.items():
args.append('"-D{}={}"'.format(k, v))
return args
def cmake_backend_arg(backend, name, type, value):
# Return cmake -D setting to set name=value for backend build. Use
# command-line specified value if one is given.
if backend in OVERRIDE_BACKEND_CMAKE_FLAGS:
if name in OVERRIDE_BACKEND_CMAKE_FLAGS[backend]:
value = OVERRIDE_BACKEND_CMAKE_FLAGS[backend][name]
if type is None:
type = ""
else:
type = ":{}".format(type)
return '"-D{}{}={}"'.format(name, type, value)
def cmake_backend_enable(backend, name, flag):
# Return cmake -D setting to set name=flag?ON:OFF for backend
# build. Use command-line specified value for 'flag' if one is
# given.
value = None
if backend in OVERRIDE_BACKEND_CMAKE_FLAGS:
if name in OVERRIDE_BACKEND_CMAKE_FLAGS[backend]:
value = OVERRIDE_BACKEND_CMAKE_FLAGS[backend][name]
if value is None:
value = "ON" if flag else "OFF"
return '"-D{}:BOOL={}"'.format(name, value)
def cmake_backend_extra_args(backend):
args = []
if backend in EXTRA_BACKEND_CMAKE_FLAGS:
for k, v in EXTRA_BACKEND_CMAKE_FLAGS[backend].items():
args.append('"-D{}={}"'.format(k, v))
return args
def cmake_repoagent_arg(name, type, value):
# For now there is no override for repo-agents
if type is None:
type = ""
else:
type = ":{}".format(type)
return '"-D{}{}={}"'.format(name, type, value)
def cmake_repoagent_enable(name, flag):
# For now there is no override for repo-agents
value = "ON" if flag else "OFF"
return '"-D{}:BOOL={}"'.format(name, value)
def cmake_repoagent_extra_args():
# For now there is no extra args for repo-agents
args = []
return args
def cmake_cache_arg(name, type, value):
# For now there is no override for caches
if type is None:
type = ""
else:
type = ":{}".format(type)
return '"-D{}{}={}"'.format(name, type, value)
def cmake_cache_enable(name, flag):
# For now there is no override for caches
value = "ON" if flag else "OFF"
return '"-D{}:BOOL={}"'.format(name, value)
def cmake_cache_extra_args():
# For now there is no extra args for caches
args = []
return args
def core_cmake_args(components, backends, cmake_dir, install_dir):
cargs = [
cmake_core_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
cmake_core_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
cmake_core_arg("TRITON_VERSION", "STRING", FLAGS.version),
cmake_core_arg("TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization),
cmake_core_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
cmake_core_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
cmake_core_arg("TRITON_BACKEND_REPO_TAG", "STRING", components["backend"]),
cmake_core_arg(
"TRITON_THIRD_PARTY_REPO_TAG", "STRING", components["thirdparty"]
),
]
cargs.append(cmake_core_enable("TRITON_ENABLE_LOGGING", FLAGS.enable_logging))
cargs.append(cmake_core_enable("TRITON_ENABLE_STATS", FLAGS.enable_stats))
cargs.append(cmake_core_enable("TRITON_ENABLE_METRICS", FLAGS.enable_metrics))
cargs.append(
cmake_core_enable("TRITON_ENABLE_METRICS_GPU", FLAGS.enable_gpu_metrics)
)
cargs.append(
cmake_core_enable("TRITON_ENABLE_METRICS_CPU", FLAGS.enable_cpu_metrics)
)
cargs.append(cmake_core_enable("TRITON_ENABLE_TRACING", FLAGS.enable_tracing))
cargs.append(cmake_core_enable("TRITON_ENABLE_NVTX", FLAGS.enable_nvtx))
cargs.append(cmake_core_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
cargs.append(
cmake_core_arg(
"TRITON_MIN_COMPUTE_CAPABILITY", None, FLAGS.min_compute_capability
)
)
cargs.append(cmake_core_enable("TRITON_ENABLE_MALI_GPU", FLAGS.enable_mali_gpu))
cargs.append(cmake_core_enable("TRITON_ENABLE_GRPC", "grpc" in FLAGS.endpoint))
cargs.append(cmake_core_enable("TRITON_ENABLE_HTTP", "http" in FLAGS.endpoint))
cargs.append(
cmake_core_enable("TRITON_ENABLE_SAGEMAKER", "sagemaker" in FLAGS.endpoint)
)
cargs.append(
cmake_core_enable("TRITON_ENABLE_VERTEX_AI", "vertex-ai" in FLAGS.endpoint)
)
cargs.append(cmake_core_enable("TRITON_ENABLE_GCS", "gcs" in FLAGS.filesystem))
cargs.append(cmake_core_enable("TRITON_ENABLE_S3", "s3" in FLAGS.filesystem))
cargs.append(
cmake_core_enable(
"TRITON_ENABLE_AZURE_STORAGE", "azure_storage" in FLAGS.filesystem
)
)
cargs.append(cmake_core_enable("TRITON_ENABLE_ENSEMBLE", "ensemble" in backends))
cargs.append(cmake_core_enable("TRITON_ENABLE_TENSORRT", "tensorrt" in backends))
cargs += cmake_core_extra_args()
cargs.append(cmake_dir)
return cargs
def repoagent_repo(ra):
return "{}_repository_agent".format(ra)
def repoagent_cmake_args(images, components, ra, install_dir):
args = []
cargs = args + [
cmake_repoagent_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
cmake_repoagent_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
cmake_repoagent_arg(
"TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
),
cmake_repoagent_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
cmake_repoagent_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
]
cargs.append(cmake_repoagent_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
cargs += cmake_repoagent_extra_args()
cargs.append("..")
return cargs
def cache_repo(cache):
# example: "local", or "redis"
return "{}_cache".format(cache)
def cache_cmake_args(images, components, cache, install_dir):
args = []
cargs = args + [
cmake_cache_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
cmake_cache_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
cmake_cache_arg(
"TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
),
cmake_cache_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
cmake_cache_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
]
cargs.append(cmake_cache_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
cargs += cmake_cache_extra_args()
cargs.append("..")
return cargs
def backend_repo(be):
return "{}_backend".format(be)
def backend_cmake_args(images, components, be, install_dir, library_paths):
cmake_build_type = FLAGS.build_type
if be == "onnxruntime":
args = onnxruntime_cmake_args(images, library_paths)
elif be == "openvino":
args = openvino_cmake_args()
elif be == "python":
args = python_cmake_args()
elif be == "dali":
args = dali_cmake_args()
elif be == "pytorch":
args = pytorch_cmake_args(images)
elif be == "armnn_tflite":
args = armnn_tflite_cmake_args()
elif be == "fil":
args = fil_cmake_args(images)
# DLIS-4618: FIL backend fails debug build, so override it for now.
cmake_build_type = "Release"
elif be == "fastertransformer":
args = fastertransformer_cmake_args()
elif be == "tensorrt":
args = tensorrt_cmake_args()
elif be == "tensorrtllm":
args = tensorrtllm_cmake_args(images)
else:
args = []
cargs = args + [
cmake_backend_arg(be, "CMAKE_BUILD_TYPE", None, cmake_build_type),
cmake_backend_arg(be, "CMAKE_INSTALL_PREFIX", "PATH", install_dir),
cmake_backend_arg(
be, "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
),
cmake_backend_arg(be, "TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
cmake_backend_arg(be, "TRITON_CORE_REPO_TAG", "STRING", components["core"]),
cmake_backend_arg(
be, "TRITON_BACKEND_REPO_TAG", "STRING", components["backend"]
),
]
cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_GPU", FLAGS.enable_gpu))
cargs.append(
cmake_backend_enable(be, "TRITON_ENABLE_MALI_GPU", FLAGS.enable_mali_gpu)
)
cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_STATS", FLAGS.enable_stats))
cargs.append(
cmake_backend_enable(be, "TRITON_ENABLE_METRICS", FLAGS.enable_metrics)
)
# [DLIS-4950] always enable below once Windows image is updated with CUPTI
# cargs.append(cmake_backend_enable(be, 'TRITON_ENABLE_MEMORY_TRACKER', True))
if (target_platform() == "windows") and (not FLAGS.no_container_build):
print(
"Warning: Detected docker build is used for Windows, backend utility 'device memory tracker' will be disabled due to missing library in CUDA Windows docker image."
)
cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", False))
elif target_platform() == "igpu":
print(
"Warning: Detected iGPU build, backend utility 'device memory tracker' will be disabled as iGPU doesn't contain required version of the library."
)
cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", False))
elif FLAGS.enable_gpu:
cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", True))
cargs += cmake_backend_extra_args(be)
if be == "tensorrtllm":
cargs.append("-S ../triton_backend/inflight_batcher_llm -B .")
else:
cargs.append("..")
return cargs
def python_cmake_args():
cargs = []
if target_platform() == "rhel":
cargs.append(
cmake_backend_arg(
"python", "PYBIND11_PYTHON_VERSION", "STRING", FLAGS.rhel_py_version
)
)
return cargs
def pytorch_cmake_args(images):
if "pytorch" in images:
image = images["pytorch"]
else:
image = "nvcr.io/nvidia/pytorch:{}-py3".format(FLAGS.upstream_container_version)
cargs = [
cmake_backend_arg("pytorch", "TRITON_PYTORCH_DOCKER_IMAGE", None, image),
]
# TODO: TPRD-372 TorchTRT extension is not currently supported by our manylinux build
# TODO: TPRD-373 NVTX extension is not currently supported by our manylinux build
if target_platform() != "rhel":
if FLAGS.enable_gpu:
cargs.append(
cmake_backend_enable("pytorch", "TRITON_PYTORCH_ENABLE_TORCHTRT", True)
)
cargs.append(
cmake_backend_enable("pytorch", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx)
)
if target_platform() == "igpu":
cargs.append(
cmake_backend_enable("pytorch", "TRITON_PYTORCH_NVSHMEM", False)
)
return cargs
def onnxruntime_cmake_args(images, library_paths):
cargs = [
cmake_backend_arg(
"onnxruntime",
"TRITON_BUILD_ONNXRUNTIME_VERSION",
None,
os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
if os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
else FLAGS.ort_version,
)
]
# TRITON_ENABLE_GPU is already set for all backends in backend_cmake_args()
if FLAGS.enable_gpu:
# TODO: TPRD-712 TensorRT is not currently supported by our RHEL build for SBSA.
if target_platform() != "rhel" or (
target_platform() == "rhel" and target_machine() == "x86_64"
):
cargs.append(
cmake_backend_enable(
"onnxruntime", "TRITON_ENABLE_ONNXRUNTIME_TENSORRT", True
)
)
if target_platform() == "windows":
if "base" in images:
cargs.append(
cmake_backend_arg(
"onnxruntime", "TRITON_BUILD_CONTAINER", None, images["base"]
)
)
else:
if "base" in images:
cargs.append(
cmake_backend_arg(
"onnxruntime", "TRITON_BUILD_CONTAINER", None, images["base"]
)
)
else:
cargs.append(
cmake_backend_arg(
"onnxruntime",
"TRITON_BUILD_CONTAINER_VERSION",
None,
FLAGS.upstream_container_version,
)
)
# TODO: TPRD-333 OpenVino extension is not currently supported by our manylinux build
if (
(target_machine() != "aarch64")
and (target_platform() != "rhel")
and (FLAGS.ort_openvino_version is not None)
):
cargs.append(
cmake_backend_enable(
"onnxruntime", "TRITON_ENABLE_ONNXRUNTIME_OPENVINO", True
)
)
cargs.append(
cmake_backend_arg(
"onnxruntime",
"TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION",
None,
FLAGS.ort_openvino_version,
)
)
if (target_platform() == "igpu") or (target_platform() == "rhel"):
cargs.append(
cmake_backend_arg(
"onnxruntime",
"TRITON_BUILD_TARGET_PLATFORM",
None,
target_platform(),
)
)
return cargs
def openvino_cmake_args():
cargs = [
cmake_backend_arg(
"openvino",
"TRITON_BUILD_OPENVINO_VERSION",
None,
FLAGS.standalone_openvino_version,
)
]
if target_platform() == "windows":
if "base" in images:
cargs.append(
cmake_backend_arg(
"openvino", "TRITON_BUILD_CONTAINER", None, images["base"]
)
)
else:
if "base" in images:
cargs.append(
cmake_backend_arg(
"openvino", "TRITON_BUILD_CONTAINER", None, images["base"]
)
)
else:
cargs.append(
cmake_backend_arg(
"openvino",
"TRITON_BUILD_CONTAINER_VERSION",
None,
FLAGS.upstream_container_version,
)
)
return cargs
def tensorrt_cmake_args():
cargs = [
cmake_backend_enable("tensorrt", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx),
]
if target_platform() == "windows":
cargs.append(
cmake_backend_arg(
"tensorrt", "TRITON_TENSORRT_INCLUDE_PATHS", None, "c:/TensorRT/include"
)
)
return cargs
def dali_cmake_args():
return [
cmake_backend_enable("dali", "TRITON_DALI_SKIP_DOWNLOAD", False),
]
def fil_cmake_args(images):
cargs = [cmake_backend_enable("fil", "TRITON_FIL_DOCKER_BUILD", True)]
if "base" in images:
cargs.append(
cmake_backend_arg("fil", "TRITON_BUILD_CONTAINER", None, images["base"])
)
else:
cargs.append(
cmake_backend_arg(
"fil",
"TRITON_BUILD_CONTAINER_VERSION",
None,
FLAGS.upstream_container_version,
)
)
return cargs
def armnn_tflite_cmake_args():
return [
cmake_backend_arg("armnn_tflite", "JOBS", None, multiprocessing.cpu_count()),
]
def fastertransformer_cmake_args():
print("Warning: FasterTransformer backend is not officially supported.")
cargs = [
cmake_backend_arg(
"fastertransformer", "CMAKE_EXPORT_COMPILE_COMMANDS", None, 1
),
cmake_backend_arg("fastertransformer", "ENABLE_FP8", None, "OFF"),
]
return cargs
def tensorrtllm_cmake_args(images):
cargs = []
cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True))
return cargs
def install_dcgm_libraries(dcgm_version, target_machine):
if dcgm_version == "":
fail(
"unable to determine default repo-tag, DCGM version not known for {}".format(
FLAGS.version
)
)
return ""
else:
# RHEL has the same install instructions for both aarch64 and x86
if target_platform() == "rhel":
if target_machine == "aarch64":
return """
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\
&& dnf clean expire-cache \\
&& dnf makecache --refresh \\
&& dnf install --assumeyes \\
datacenter-gpu-manager-4-core-1:{} \\
datacenter-gpu-manager-4-devel-1:{}
""".format(
dcgm_version, dcgm_version, dcgm_version
)
else:
return """
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \\
&& dnf clean expire-cache \\
&& dnf makecache --refresh \\
&& dnf install --assumeyes \\
datacenter-gpu-manager-4-core-1:{} \\
datacenter-gpu-manager-4-devel-1:{}
""".format(
dcgm_version, dcgm_version, dcgm_version
)
else:
if target_machine == "aarch64":
return """
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN curl -o /tmp/cuda-keyring.deb \\
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\
&& apt install /tmp/cuda-keyring.deb \\
&& rm /tmp/cuda-keyring.deb \\
&& apt update -qq \\
&& apt install --yes --no-install-recommends \\
datacenter-gpu-manager-4-core=1:{} \\
datacenter-gpu-manager-4-dev=1:{}
""".format(
dcgm_version, dcgm_version, dcgm_version
)
else:
return """
ENV DCGM_VERSION {}
# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
RUN curl -o /tmp/cuda-keyring.deb \\
https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb \\
&& apt install /tmp/cuda-keyring.deb \\
&& rm /tmp/cuda-keyring.deb \\
&& apt update -qq \\
&& apt install --yes --no-install-recommends \\
datacenter-gpu-manager-4-core=1:{} \\
datacenter-gpu-manager-4-dev=1:{}
""".format(
dcgm_version, dcgm_version, dcgm_version
)
def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
ARG BASE_IMAGE={}
""".format(
argmap["TRITON_VERSION"],
argmap["TRITON_CONTAINER_VERSION"],
argmap["BASE_IMAGE"],
)
df += """
FROM ${BASE_IMAGE}
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
"""
df += """
# Install docker docker buildx
RUN yum install -y ca-certificates curl gnupg yum-utils \\
&& yum-config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo \\
&& yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# && yum install -y docker.io docker-buildx-plugin
# libcurl4-openSSL-dev is needed for GCS
# python3-dev is needed by Torchvision
# python3-pip and libarchive-dev is needed by python backend
# libxml2-dev is needed for Azure Storage
# scons is needed for armnn_tflite backend build dep
RUN yum install -y \\
autoconf \\
automake \\
bzip2-devel \\
ca-certificates \\
git \\
gperf \\
gperftools-devel \\
libarchive-devel \\
libb64-devel \\
libcurl-devel \\
libtool \\
libxml2-devel \\
ncurses-devel \\
numactl-devel \\
openssl-devel \\
pkg-config \\
python3-pip \\
python3-scons \\
python3-setuptools \\
rapidjson-devel \\
re2-devel \\
readline-devel \\
unzip \\
wget \\
xz-devel \\
zlib-devel
"""
if os.getenv("CCACHE_REMOTE_ONLY") and os.getenv("CCACHE_REMOTE_STORAGE"):
df += """
RUN curl -k -s -L https://github.com/ccache/ccache/archive/refs/tags/v4.10.2.tar.gz -o /tmp/ccache.tar.gz \\
&& tar -xzf /tmp/ccache.tar.gz -C /tmp \\
&& cmake -D CMAKE_BUILD_TYPE=Release -S /tmp/ccache-4.10.2 -B /tmp/build \\
&& cmake --build /tmp/build -j$(nproc) -t install \\
&& rm -rf /tmp/ccache.tar.gz /tmp/ccache-4.10.2 /tmp/build
ENV CCACHE_REMOTE_ONLY="true" \\
CCACHE_REMOTE_STORAGE="{}" \\
CMAKE_CXX_COMPILER_LAUNCHER="ccache" \\
CMAKE_C_COMPILER_LAUNCHER="ccache" \\
CMAKE_CUDA_COMPILER_LAUNCHER="ccache"
RUN ccache -p
""".format(
os.getenv("CCACHE_REMOTE_STORAGE")
)
# Requires openssl-devel to be installed first for pyenv build to be successful
df += change_default_python_version_rhel(FLAGS.rhel_py_version)
df += """
RUN pip3 install --upgrade pip \\
&& pip3 install --upgrade \\
build \\
wheel \\
setuptools \\
docker \\
virtualenv \\
patchelf==0.17.2 \\
cmake==4.0.3
"""
df += f"""
# Install boost version >= 1.78 for boost::span
# Current libboost-dev apt packages are < 1.78, so install from tar.gz
RUN wget -O /tmp/boost.tar.gz {FLAGS.boost_url} \\
&& sha256sum /tmp/boost.tar.gz | grep {FLAGS.boost_sha256} \\
&& (cd /tmp && tar xzf boost.tar.gz) \\
&& mv /tmp/boost_1_80_0/boost /usr/include/boost
"""
if FLAGS.enable_gpu:
df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
df += """
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
"""
df += """
WORKDIR /workspace
RUN rm -fr *
COPY . .
ENTRYPOINT []
"""
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
dfile.write(df)
def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
ARG BASE_IMAGE={}
""".format(
argmap["TRITON_VERSION"],
argmap["TRITON_CONTAINER_VERSION"],
argmap["BASE_IMAGE"],
)
df += """
FROM ${BASE_IMAGE}
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
"""
# Install the windows- or linux-specific buildbase dependencies
if target_platform() == "windows":
df += """
RUN python3 -m pip install build
SHELL ["cmd", "/S", "/C"]
"""
else:
df += """
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
# Install docker docker buildx
RUN apt-get update \\
&& apt-get install -y ca-certificates curl gnupg \\
&& install -m 0755 -d /etc/apt/keyrings \\
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg \\
&& chmod a+r /etc/apt/keyrings/docker.gpg \\
&& echo \\
"deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \\
"$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \\
tee /etc/apt/sources.list.d/docker.list > /dev/null \\
&& apt-get update \\
&& apt-get install -y docker.io docker-buildx-plugin
# libcurl4-openSSL-dev is needed for GCS
# python3-dev is needed by Torchvision
# python3-pip and libarchive-dev is needed by python backend
# libxml2-dev is needed for Azure Storage
# scons is needed for armnn_tflite backend build dep
RUN apt-get update \\
&& apt-get install -y --no-install-recommends \\
ca-certificates \\
autoconf \\
automake \\
build-essential \\
git \\
gperf \\
libre2-dev \\
libssl-dev \\
libtool \\
libcurl4-openssl-dev \\
libb64-dev \\
libgoogle-perftools-dev \\
python3-dev \\
python3-pip \\
python3-wheel \\
python3-setuptools \\
rapidjson-dev \\
scons \\
software-properties-common \\
pkg-config \\
unzip \\
wget \\
zlib1g-dev \\
libarchive-dev \\
libxml2-dev \\
libnuma-dev \\
wget \\
&& rm -rf /var/lib/apt/lists/*
RUN pip3 install --upgrade \\
build \\
docker \\
virtualenv \\
patchelf==0.17.2 \\
cmake==4.0.3 \\
pybind11[global]
"""
df += f"""
# Install boost version >= 1.78 for boost::span
# Current libboost-dev apt packages are < 1.78, so install from tar.gz
RUN wget -O /tmp/boost.tar.gz {FLAGS.boost_url} \\
&& sha256sum /tmp/boost.tar.gz | grep {FLAGS.boost_sha256} \\
&& (cd /tmp && tar xzf boost.tar.gz) \\
&& mv /tmp/boost_1_80_0/boost /usr/include/boost
"""
if FLAGS.enable_gpu:
df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
df += """
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
"""
if os.getenv("CCACHE_REMOTE_ONLY") and os.getenv("CCACHE_REMOTE_STORAGE"):
df += """
ENV CCACHE_REMOTE_ONLY="true" \\
CCACHE_REMOTE_STORAGE="{}" \\
CMAKE_CXX_COMPILER_LAUNCHER="ccache" \\
CMAKE_C_COMPILER_LAUNCHER="ccache" \\
CMAKE_CUDA_COMPILER_LAUNCHER="ccache"
RUN apt-get update \\
&& apt-get install -y --no-install-recommends ccache && ccache -p \\
&& rm -rf /var/lib/apt/lists/*
""".format(
os.getenv("CCACHE_REMOTE_STORAGE")
)
# Copy in the triton source. We remove existing contents first in
# case the FROM container has something there already.
if target_platform() == "windows":
df += """
WORKDIR /workspace
RUN rmdir /S/Q * || exit 0
COPY . .
"""
else:
df += """
WORKDIR /workspace
RUN rm -fr *
COPY . .
ENTRYPOINT []
"""
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
dfile.write(df)
def create_dockerfile_cibase(ddir, dockerfile_name, argmap):
df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
ARG BASE_IMAGE={}
""".format(
argmap["TRITON_VERSION"],
argmap["TRITON_CONTAINER_VERSION"],
argmap["BASE_IMAGE"],
)
df += """
FROM ${BASE_IMAGE}
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
COPY build/ci /workspace
WORKDIR /workspace
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
dfile.write(df)
def create_dockerfile_linux(
ddir, dockerfile_name, argmap, backends, repoagents, caches, endpoints
):
df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
""".format(
argmap["TRITON_VERSION"],
argmap["TRITON_CONTAINER_VERSION"],
)
if "vllm" in backends and argmap["INFERENCE_IMAGE"] is None:
argmap[
"INFERENCE_IMAGE"
] = f"nvcr.io/nvidia/vllm:{FLAGS.upstream_container_version}-py3"
df += """ARG BASE_IMAGE={}
""".format(
argmap["INFERENCE_IMAGE"]
if argmap["INFERENCE_IMAGE"] is not None
else argmap["BASE_IMAGE"],
)
# PyTorch backends need extra CUDA and other
# dependencies during runtime that are missing in the CPU-only base container.
# These dependencies must be copied from the Triton Min image.
if not FLAGS.enable_gpu and ("pytorch" in backends):
df += """
############################################################################
## Triton Min image
############################################################################
FROM {} AS min_container
""".format(
argmap["GPU_BASE_IMAGE"]
)
df += """
############################################################################
## Production stage: Create container with just inference server executable
############################################################################
FROM ${BASE_IMAGE}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""
df += dockerfile_prepare_container_linux(
argmap, backends, FLAGS.enable_gpu, target_machine()
)
df += f"""
WORKDIR /opt
COPY --chown=1000:1000 build/install tritonserver
WORKDIR /opt/tritonserver
COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \\
"tritonserver-*.whl" | xargs -I {{}} pip install --upgrade {{}}[{FLAGS.triton_wheels_dependencies_group}] && \\
find /opt/tritonserver/python -maxdepth 1 -type f -name \\
"tritonfrontend-*.whl" | xargs -I {{}} pip install --upgrade {{}}[{FLAGS.triton_wheels_dependencies_group}]
RUN pip3 install -r python/openai/requirements.txt
"""
if not FLAGS.no_core_build:
# Add feature labels for SageMaker endpoint
if "sagemaker" in endpoints:
df += """
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
COPY --chown=1000:1000 docker/sagemaker/serve /usr/bin/.
"""
# This is required since libcublasLt.so is not present during the build
# stage of the PyTorch backend
if not FLAGS.enable_gpu and ("pytorch" in backends):
df += """
RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.13 backends/pytorch/libtorch_cuda.so
"""
if "tensorrtllm" in backends:
df += """
RUN ldconfig && \\
find /opt/tritonserver -name lib*so -exec dirname {} \\; > /etc/ld.so.conf.d/tritonserver.conf && \\
ldconfig
"""
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
dfile.write(df)
def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_machine):
gpu_enabled = 1 if enable_gpu else 0
# Common steps to produce docker images shared by build.py and compose.py.
# Sets environment variables, installs dependencies and adds entrypoint
df = """
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
LABEL com.nvidia.tritonserver.version="${TRITON_SERVER_VERSION}"
ENV PATH /opt/tritonserver/bin:${PATH}
# Remove once https://github.com/openucx/ucx/pull/9148 is available
# in the min container.
ENV UCX_MEM_EVENTS no
"""
# Necessary for libtorch.so to find correct HPCX libraries
if "pytorch" in backends:
df += """
ENV LD_LIBRARY_PATH /opt/hpcx/ucc/lib/:/opt/hpcx/ucx/lib/:${LD_LIBRARY_PATH}
"""
backend_dependencies = ""
# libgomp1 is needed by both onnxruntime and pytorch backends
if ("onnxruntime" in backends) or ("pytorch" in backends):
backend_dependencies = "libgomp1"
# libgfortran5 is needed by pytorch backend on ARM
if ("pytorch" in backends) and (target_machine == "aarch64"):
backend_dependencies += " libgfortran5"
# openssh-server is needed for fastertransformer
if "fastertransformer" in backends:
backend_dependencies += " openssh-server"
df += """
ENV TF_ADJUST_HUE_FUSED 1
ENV TF_ADJUST_SATURATION_FUSED 1
ENV TF_ENABLE_WINOGRAD_NONFUSED 1
ENV TF_AUTOTUNE_THRESHOLD 2
ENV TRITON_SERVER_GPU_ENABLED {gpu_enabled}
# Create a user that can be used to run triton as
# non-root. Make sure that this user to given ID 1000. All server
# artifacts copied below are assign to this user.
ENV TRITON_SERVER_USER=triton-server
RUN userdel tensorrt-server > /dev/null 2>&1 || true \\
&& userdel ubuntu > /dev/null 2>&1 || true \\
&& if ! id -u $TRITON_SERVER_USER > /dev/null 2>&1 ; then \\
useradd $TRITON_SERVER_USER; \\
fi \\
&& [ `id -u $TRITON_SERVER_USER` -eq 1000 ] \\
&& [ `id -g $TRITON_SERVER_USER` -eq 1000 ]
""".format(
gpu_enabled=gpu_enabled
)
if target_platform() == "rhel":
df += """
# Common dependencies.
RUN yum install -y \\
git \\
gperf \\
re2-devel \\
openssl-devel \\
libtool \\
libcurl-devel \\
libb64-devel \\
gperftools-devel \\
wget \\
python3.12-pip \\
numactl-devel
RUN pip3 install patchelf==0.17.2
"""
else:
df += """
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
# Common dependencies. FIXME (can any of these be conditional? For
# example libcurl only needed for GCS?)
RUN apt-get update \\
&& apt-get install -y --no-install-recommends \\
clang \\
curl \\
dirmngr \\
git \\
gperf \\
libb64-0d \\
libcurl4-openssl-dev \\
libgoogle-perftools-dev \\
libjemalloc-dev \\
libnuma-dev \\
wget \\
{backend_dependencies} \\
python3-pip \\
&& rm -rf /var/lib/apt/lists/*
""".format(
backend_dependencies=backend_dependencies
)
df += """
# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc
ENV TCMALLOC_RELEASE_RATE 200
"""
if "fastertransformer" in backends:
be = "fastertransformer"
url = "https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/{}/docker/create_dockerfile_and_build.py".format(
backends[be]
)
response = requests.get(url)
spec = importlib.util.spec_from_loader(
"fastertransformer_buildscript", loader=None, origin=url
)
fastertransformer_buildscript = importlib.util.module_from_spec(spec)
exec(response.content, fastertransformer_buildscript.__dict__)
df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False)
if enable_gpu:
df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine)
# This segment will break the RHEL SBSA build. Need to determine whether
# this is necessary to incorporate.
if target_platform() != "rhel":
df += """
# Extra defensive wiring for CUDA Compat lib
RUN ln -sf ${_CUDA_COMPAT_PATH}/lib.real ${_CUDA_COMPAT_PATH}/lib \\
&& echo ${_CUDA_COMPAT_PATH}/lib > /etc/ld.so.conf.d/00-cuda-compat.conf \\
&& ldconfig \\
&& rm -f ${_CUDA_COMPAT_PATH}/lib
"""
else:
df += add_cpu_libs_to_linux_dockerfile(backends, target_machine)
# Add dependencies needed for python backend
if "python" in backends:
if target_platform() == "rhel":
df += """
# python3, python3-pip and some pip installs required for the python backend
RUN yum install -y \\
libarchive-devel \\
openssl-devel \\
readline-devel
"""
# Requires openssl-devel to be installed first for pyenv build to be successful
df += change_default_python_version_rhel(FLAGS.rhel_py_version)
df += """
RUN pip3 install --upgrade pip \\
&& pip3 install --upgrade \\
wheel \\
setuptools \\
\"numpy<2\" \\
virtualenv
"""
else:
df += """
# python3, python3-pip and some pip installs required for the python backend
RUN apt-get update \\
&& apt-get install -y --no-install-recommends \\
python3 \\
libarchive-dev \\
python3-pip \\
python3-wheel \\
python3-setuptools \\
libpython3-dev \\
&& pip3 install --upgrade \\
\"numpy<2\" \\
virtualenv \\
&& rm -rf /var/lib/apt/lists/*
"""
if "tensorrtllm" in backends or "vllm" in backends:
df += """
ENV TRITON_CUDACRT_PATH=/usr/local/cuda/include \\
TRITON_CUDART_PATH=/usr/local/cuda/include \\
TRITON_CUOBJDUMP_PATH=/usr/local/cuda/bin/cuobjdump \\
TRITON_CUPTI_PATH=/usr/local/cuda/include \\
TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm \\
TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
"""
if "dali" in backends:
df += """
# Update Python path to include DALI
ENV PYTHONPATH=/opt/tritonserver/backends/dali/wheel/dali:$PYTHONPATH
"""
if target_platform() == "rhel":
repo_arch = "sbsa" if target_machine == "aarch64" else "x86_64"
df += """
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/{repo_arch}/cuda-rhel8.repo \\
&& dnf clean expire-cache \\
&& dnf install --assumeyes libnvshmem3-cuda-13
RUN dirname $(find /usr -name "libcudart*.so" -o -name "libnvinf*.so" -o -name "libnvshm*" -type f) | sort -u > /etc/ld.so.conf.d/triton-cuda-libs.conf && ldconfig
""".format(
repo_arch=repo_arch
)
df += """
WORKDIR /opt/tritonserver
RUN rm -fr /opt/tritonserver/*
ENV NVIDIA_PRODUCT_NAME="Triton Server"
COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
"""
# The CPU-only build uses ubuntu as the base image, and so the
# entrypoint files are not available in /opt/nvidia in the base
# image, so we must provide them ourselves.
if not enable_gpu:
df += """
COPY docker/cpu_only/ /opt/nvidia/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
"""
df += """
ENV NVIDIA_BUILD_ID {}
LABEL com.nvidia.build.id={}
LABEL com.nvidia.build.ref={}
""".format(
argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"]
)
return df
def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
df = ""
libs_arch = "aarch64" if target_machine == "aarch64" else "x86_64"
if "pytorch" in backends:
# Add extra dependencies for pytorch backend.
# Note: Even though the build is CPU-only, the version of pytorch
# we are using depend upon libraries like cuda and cudnn. Since
# these dependencies are not present in the ubuntu base image,
# we must copy these from the Triton min container ourselves.
cuda_arch = "sbsa" if target_machine == "aarch64" else "x86_64"
df += """
RUN mkdir -p /usr/local/cuda/lib64/stubs
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusparse.so /usr/local/cuda/lib64/stubs/libcusparse.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcurand.so /usr/local/cuda/lib64/stubs/libcurand.so.10
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.13
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.13
RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib
COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcufile.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvrtc.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcusparseLt.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvshmem_host.so.3 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/
COPY --from=min_container /opt/hpcx/ucc/lib/libucc.so.1 /opt/hpcx/ucc/lib/libucc.so.1
COPY --from=min_container /opt/hpcx/ucx/lib/libucm.so.0 /opt/hpcx/ucx/lib/libucm.so.0
COPY --from=min_container /opt/hpcx/ucx/lib/libucp.so.0 /opt/hpcx/ucx/lib/libucp.so.0
COPY --from=min_container /opt/hpcx/ucx/lib/libucs.so.0 /opt/hpcx/ucx/lib/libucs.so.0
COPY --from=min_container /opt/hpcx/ucx/lib/libuct.so.0 /opt/hpcx/ucx/lib/libuct.so.0
COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9
# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so
RUN apt-get update \\
&& apt-get install -y --no-install-recommends openmpi-bin
RUN pip3 install patchelf==0.17.2
ENV LD_LIBRARY_PATH /usr/local/cuda/targets/{cuda_arch}-linux/lib:/usr/local/cuda/lib64/stubs:${{LD_LIBRARY_PATH}}
""".format(
cuda_arch=cuda_arch, libs_arch=libs_arch
)
if "pytorch" in backends:
# Add NCCL dependency for pytorch backend.
# Note: Even though the build is CPU-only, the version of
# pytorch we are using depends upon the NCCL library.
# Since this dependency is not present in the ubuntu base image,
# we must copy it from the Triton min container ourselves.
df += """
COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libnccl.so.2 /usr/lib/{libs_arch}-linux-gnu/libnccl.so.2
""".format(
libs_arch=libs_arch
)
return df
def change_default_python_version_rhel(version):
df = f"""
# The python library version available for install via 'yum install python3.X-devel' does not
# match the version of python inside the RHEL base container. This means that python packages
# installed within the container will not be picked up by the python backend stub process pybind
# bindings. It must instead must be installed via pyenv.
ENV PYENV_ROOT=/opt/pyenv_build
RUN curl https://pyenv.run | bash
ENV PATH="${{PYENV_ROOT}}/bin:$PATH"
RUN eval "$(pyenv init -)"
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {version} \\
&& cp ${{PYENV_ROOT}}/versions/{version}/lib/libpython3* /usr/lib64/
# RHEL image has several python versions. It's important
# to set the correct version, otherwise, packages that are
# pip installed will not be found during testing.
ENV PYVER={version} PYTHONPATH=/opt/python/v
RUN ln -sf ${{PYENV_ROOT}}/versions/${{PYVER}}* ${{PYTHONPATH}}
ENV PYBIN=${{PYTHONPATH}}/bin
ENV PYTHON_BIN_PATH=${{PYBIN}}/python${{PYVER}} PATH=${{PYBIN}}:${{PATH}}
"""
return df
def create_dockerfile_windows(
ddir, dockerfile_name, argmap, backends, repoagents, caches
):
df = """
ARG TRITON_VERSION={}
ARG TRITON_CONTAINER_VERSION={}
ARG BASE_IMAGE={}
############################################################################
## Production stage: Create container with just inference server executable
############################################################################
FROM ${{BASE_IMAGE}}
ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV TRITON_SERVER_VERSION=${{TRITON_VERSION}}
ENV NVIDIA_TRITON_SERVER_VERSION=${{TRITON_CONTAINER_VERSION}}
LABEL com.nvidia.tritonserver.version="${{TRITON_SERVER_VERSION}}"
RUN setx path "%path%;C:\\opt\\tritonserver\\bin"
""".format(
argmap["TRITON_VERSION"],
argmap["TRITON_CONTAINER_VERSION"],
argmap["BASE_IMAGE"],
)
df += """
WORKDIR /opt
RUN rmdir /S/Q tritonserver || exit 0
COPY --chown=1000:1000 build/install tritonserver
WORKDIR /opt/tritonserver
COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
"""
df += """
ENTRYPOINT []
ENV NVIDIA_BUILD_ID {}
LABEL com.nvidia.build.id={}
LABEL com.nvidia.build.ref={}
""".format(
argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"]
)
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
dfile.write(df)
def create_build_dockerfiles(
container_build_dir, images, backends, repoagents, caches, endpoints
):
if "base" in images:
base_image = images["base"]
if target_platform() == "rhel":
print(
"warning: RHEL is not an officially supported target and you will probably experience errors attempting to build this container."
)
elif target_platform() == "windows":
base_image = "mcr.microsoft.com/dotnet/framework/sdk:4.8"
elif target_platform() == "rhel":
raise KeyError("A base image must be specified when targeting RHEL")
elif FLAGS.enable_gpu:
base_image = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
FLAGS.upstream_container_version
)
else:
base_image = "ubuntu:24.04"
if "inference" in images:
inference_image = images["inference"]
else:
inference_image = None
dockerfileargmap = {
"NVIDIA_BUILD_REF": "" if FLAGS.build_sha is None else FLAGS.build_sha,
"NVIDIA_BUILD_ID": "<unknown>" if FLAGS.build_id is None else FLAGS.build_id,
"TRITON_VERSION": FLAGS.version,
"TRITON_CONTAINER_VERSION": FLAGS.container_version,
"BASE_IMAGE": base_image,
"INFERENCE_IMAGE": inference_image,
"DCGM_VERSION": FLAGS.dcgm_version,
}
# For CPU-only image we need to copy some cuda libraries and dependencies
# since we are using PyTorch containers that are not CPU-only.
if (
not FLAGS.enable_gpu
and ("pytorch" in backends)
and (target_platform() != "windows")
):
if "gpu-base" in images:
gpu_base_image = images["gpu-base"]
else:
gpu_base_image = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
FLAGS.upstream_container_version
)
dockerfileargmap["GPU_BASE_IMAGE"] = gpu_base_image
if target_platform() == "rhel":
create_dockerfile_buildbase_rhel(
FLAGS.build_dir, "Dockerfile.buildbase", dockerfileargmap
)
else:
create_dockerfile_buildbase(
FLAGS.build_dir, "Dockerfile.buildbase", dockerfileargmap
)
if target_platform() == "windows":
create_dockerfile_windows(
FLAGS.build_dir,
"Dockerfile",
dockerfileargmap,
backends,
repoagents,
caches,
)
else:
create_dockerfile_linux(
FLAGS.build_dir,
"Dockerfile",
dockerfileargmap,
backends,
repoagents,
caches,
endpoints,
)
# Dockerfile used for the creating the CI base image.
create_dockerfile_cibase(FLAGS.build_dir, "Dockerfile.cibase", dockerfileargmap)
def create_docker_build_script(script_name, container_install_dir, container_ci_dir):
with BuildScript(
os.path.join(FLAGS.build_dir, script_name),
verbose=FLAGS.verbose,
desc=("Docker-based build script for Triton Inference Server"),
) as docker_script:
#
# Build base image... tritonserver_buildbase
#
docker_script.commentln(8)
docker_script.comment("Create Triton base build image")
docker_script.comment(
"This image contains all dependencies necessary to build Triton"
)
docker_script.comment()
cachefrommap = [
"tritonserver_buildbase",
"tritonserver_buildbase_cache0",
"tritonserver_buildbase_cache1",
]
baseargs = [
"docker",
"build",
"-t",
"tritonserver_buildbase",
"-f",
os.path.join(FLAGS.build_dir, "Dockerfile.buildbase"),
]
if not FLAGS.no_container_pull:
baseargs += [
"--pull",
]
# Windows docker runs in a VM and memory needs to be specified
# explicitly (at least for some configurations of docker).
if target_platform() == "windows":
if FLAGS.container_memory:
baseargs += ["--memory", FLAGS.container_memory]
if target_platform() != "windows":
baseargs += ["--cache-from={}".format(k) for k in cachefrommap]
baseargs += ["."]
docker_script.cwd(THIS_SCRIPT_DIR)
docker_script.cmd(baseargs, check_exitcode=True)
#
# Build...
#
docker_script.blankln()
docker_script.commentln(8)
docker_script.comment("Run build in tritonserver_buildbase container")
docker_script.comment("Mount a directory into the container where the install")
docker_script.comment("artifacts will be placed.")
docker_script.comment()
# Don't use '-v' to communicate the built artifacts out of the
# build, because we want this code to work even if run within
# Docker (i.e. docker-in-docker) and not just if run directly
# from host.
runargs = [
"docker",
"run",
"-w",
"/workspace/build",
"--name",
"tritonserver_builder",
]
if not FLAGS.no_container_interactive:
runargs += ["-it"]
if target_platform() == "windows":
if FLAGS.container_memory:
runargs += ["--memory", FLAGS.container_memory]
runargs += ["-v", "\\\\.\\pipe\\docker_engine:\\\\.\\pipe\\docker_engine"]
else:
runargs += ["-v", "/var/run/docker.sock:/var/run/docker.sock"]
if FLAGS.use_user_docker_config:
if os.path.exists(FLAGS.use_user_docker_config):
runargs += [
"-v",
os.path.expanduser(
FLAGS.use_user_docker_config + ":/root/.docker/config.json"
),
]
runargs += ["tritonserver_buildbase"]
if target_platform() == "windows":
runargs += ["powershell.exe", "-noexit", "-File", "./cmake_build.ps1"]
else:
runargs += ["./cmake_build"]
# Remove existing tritonserver_builder container...
if target_platform() == "windows":
docker_script.cmd(["docker", "rm", "tritonserver_builder"])
else:
docker_script._file.write(
'if [ "$(docker ps -a | grep tritonserver_builder)" ]; then docker rm -f tritonserver_builder; fi\n'
)
docker_script.cmd(runargs, check_exitcode=True)
docker_script.cmd(
[
"docker",
"cp",
"tritonserver_builder:/tmp/tritonbuild/install",
FLAGS.build_dir,
],
check_exitcode=True,
)
docker_script.cmd(
[
"docker",
"cp",
"tritonserver_builder:/tmp/tritonbuild/ci",
FLAGS.build_dir,
],
check_exitcode=True,
)
#
# Final image... tritonserver
#
docker_script.blankln()
docker_script.commentln(8)
docker_script.comment("Create final tritonserver image")
docker_script.comment()
finalargs = [
"docker",
"build",
]
if secrets:
finalargs += [
f"--secret id=req,src={requirements}",
f"--secret id=VLLM_INDEX_URL",
f"--secret id=PYTORCH_TRITON_URL",
f"--secret id=NVPL_SLIM_URL",
f"--build-arg BUILD_PUBLIC_VLLM={build_public_vllm}",
]
finalargs += [
"-t",
"tritonserver",
"-f",
os.path.join(FLAGS.build_dir, "Dockerfile"),
".",
]
docker_script.cwd(THIS_SCRIPT_DIR)
docker_script.cmd(finalargs, check_exitcode=True)
#
# CI base image... tritonserver_cibase
#
docker_script.blankln()
docker_script.commentln(8)
docker_script.comment("Create CI base image")
docker_script.comment()
cibaseargs = [
"docker",
"build",
"-t",
"tritonserver_cibase",
"-f",
os.path.join(FLAGS.build_dir, "Dockerfile.cibase"),
".",
]
docker_script.cwd(THIS_SCRIPT_DIR)
docker_script.cmd(cibaseargs, check_exitcode=True)
def core_build(
cmake_script, repo_dir, cmake_dir, build_dir, install_dir, components, backends
):
repo_build_dir = os.path.join(build_dir, "tritonserver", "build")
repo_install_dir = os.path.join(build_dir, "tritonserver", "install")
cmake_script.commentln(8)
cmake_script.comment("Triton core library and tritonserver executable")
cmake_script.comment()
cmake_script.mkdir(repo_build_dir)
cmake_script.cwd(repo_build_dir)
cmake_script.cmake(
core_cmake_args(components, backends, cmake_dir, repo_install_dir)
)
cmake_script.makeinstall()
if target_platform() == "windows":
cmake_script.mkdir(os.path.join(install_dir, "bin"))
cmake_script.cp(
os.path.join(repo_install_dir, "bin", "tritonserver.exe"),
os.path.join(install_dir, "bin"),
)
cmake_script.cp(
os.path.join(repo_install_dir, "bin", "tritonserver.dll"),
os.path.join(install_dir, "bin"),
)
cmake_script.cp(
os.path.join(repo_install_dir, "lib", "tritonserver.lib"),
os.path.join(install_dir, "bin"),
)
elif target_platform() == "rhel":
cmake_script.mkdir(os.path.join(install_dir, "bin"))
cmake_script.cp(
os.path.join(repo_install_dir, "bin", "tritonserver"),
os.path.join(install_dir, "bin"),
)
cmake_script.mkdir(os.path.join(install_dir, "lib64"))
cmake_script.cp(
os.path.join(repo_install_dir, "lib64", "libtritonserver.so"),
os.path.join(install_dir, "lib64"),
)
else:
cmake_script.mkdir(os.path.join(install_dir, "bin"))
cmake_script.cp(
os.path.join(repo_install_dir, "bin", "tritonserver"),
os.path.join(install_dir, "bin"),
)
cmake_script.mkdir(os.path.join(install_dir, "lib"))
cmake_script.cp(
os.path.join(repo_install_dir, "lib", "libtritonserver.so"),
os.path.join(install_dir, "lib"),
)
# [FIXME] Placing the tritonserver and tritonfrontend wheel files in 'python' for now,
# should be uploaded to pip registry to be able to install directly
cmake_script.mkdir(os.path.join(install_dir, "python"))
cmake_script.cp(
os.path.join(repo_install_dir, "python", "triton*.whl"),
os.path.join(install_dir, "python"),
)
cmake_script.mkdir(os.path.join(install_dir, "include", "triton"))
cmake_script.cpdir(
os.path.join(repo_install_dir, "include", "triton", "core"),
os.path.join(install_dir, "include", "triton", "core"),
)
cmake_script.cpdir(
os.path.join(repo_dir, "python", "openai"), os.path.join(install_dir, "python")
)
cmake_script.cp(os.path.join(repo_dir, "LICENSE"), install_dir)
cmake_script.cp(os.path.join(repo_dir, "TRITON_VERSION"), install_dir)
# If requested, package the source code for all OSS used to build
# For windows, Triton is not delivered as a container so skip for
# windows platform.
if target_platform() != "windows":
if (
(not FLAGS.no_container_build)
and (not FLAGS.no_core_build)
and (not FLAGS.no_container_source)
):
cmake_script.mkdir(os.path.join(install_dir, "third-party-src"))
cmake_script.cwd(repo_build_dir)
cmake_script.tar(
"third-party-src",
os.path.join(install_dir, "third-party-src", "src.tar.gz"),
)
cmake_script.cp(
os.path.join(repo_dir, "docker", "README.third-party-src"),
os.path.join(install_dir, "third-party-src", "README"),
)
cmake_script.comment()
cmake_script.comment("end Triton core library and tritonserver executable")
cmake_script.commentln(8)
cmake_script.blankln()
def tensorrtllm_prebuild(cmake_script):
# Export the TRT_ROOT environment variable
cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
cmake_script.cmd("export ARCH=$(uname -m)")
cmake_script.cmd(
'export LD_LIBRARY_PATH="/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}"'
)
def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir):
# TODO: Update the CMakeLists.txt of TRT-LLM backend to install the artifacts to the correct location
cmake_destination_dir = os.path.join(repo_install_dir, "backends/tensorrtllm")
cmake_script.mkdir(cmake_destination_dir)
# Copy over the TRT-LLM backend libraries
cmake_script.cp(
os.path.join(tensorrtllm_be_dir, "build", "libtriton_tensorrtllm*.so"),
cmake_destination_dir,
)
cmake_script.cp(
os.path.join(tensorrtllm_be_dir, "build", "trtllmExecutorWorker"),
cmake_destination_dir,
)
def backend_build(
be,
cmake_script,
tag,
build_dir,
install_dir,
github_organization,
images,
components,
library_paths,
):
repo_build_dir = os.path.join(build_dir, be, "build")
repo_install_dir = os.path.join(build_dir, be, "install")
cmake_script.commentln(8)
cmake_script.comment(f"'{be}' backend")
cmake_script.comment("Delete this section to remove backend from build")
cmake_script.comment()
cmake_script.mkdir(build_dir)
cmake_script.cwd(build_dir)
if be == "tensorrtllm":
repository_name = "TensorRT-LLM"
cmake_script.gitclone(repository_name, tag, be, github_organization)
else:
cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
if be == "tensorrtllm":
tensorrtllm_prebuild(cmake_script)
cmake_script.mkdir(repo_build_dir)
cmake_script.cwd(repo_build_dir)
cmake_script.cmake(
backend_cmake_args(images, components, be, repo_install_dir, library_paths)
)
cmake_script.makeinstall()
if be == "tensorrtllm":
tensorrtllm_be_dir = os.path.join(build_dir, be)
tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir)
cmake_script.mkdir(os.path.join(install_dir, "backends"))
cmake_script.rmdir(os.path.join(install_dir, "backends", be))
# The python library version available for install via 'yum install python3.X-devel' does not
# match the version of python inside the RHEL base container. This means that python packages
# installed within the container will not be picked up by the python backend stub process pybind
# bindings. It must instead must be installed via pyenv. We package it here for better usability.
if target_platform() == "rhel" and be == "python":
major_minor_version = ".".join((FLAGS.rhel_py_version).split(".")[:2])
version_matched_files = "/usr/lib64/libpython" + major_minor_version + "*"
cmake_script.cp(
version_matched_files, os.path.join(repo_install_dir, "backends", be)
)
cmake_script.cpdir(
os.path.join(repo_install_dir, "backends", be),
os.path.join(install_dir, "backends"),
)
cmake_script.comment()
cmake_script.comment(f"end '{be}' backend")
cmake_script.commentln(8)
cmake_script.blankln()
def backend_clone(
be,
clone_script,
tag,
build_dir,
install_dir,
github_organization,
):
clone_script.commentln(8)
clone_script.comment(f"'{be}' backend")
clone_script.comment("Delete this section to remove backend from build")
clone_script.comment()
clone_script.mkdir(build_dir)
clone_script.cwd(build_dir)
clone_script.gitclone(backend_repo(be), tag, be, github_organization)
repo_target_dir = os.path.join(install_dir, "backends")
clone_script.mkdir(repo_target_dir)
backend_dir = os.path.join(repo_target_dir, be)
clone_script.rmdir(backend_dir)
clone_script.mkdir(backend_dir)
clone_script.cp(
os.path.join(build_dir, be, "src", "model.py"),
backend_dir,
)
clone_script.cpdir(
os.path.join(build_dir, be, "src", "utils"),
backend_dir,
)
clone_script.comment()
clone_script.comment(f"end '{be}' backend")
clone_script.commentln(8)
clone_script.blankln()
def repo_agent_build(
ra, cmake_script, build_dir, install_dir, repoagent_repo, repoagents
):
repo_build_dir = os.path.join(build_dir, ra, "build")
repo_install_dir = os.path.join(build_dir, ra, "install")
cmake_script.commentln(8)
cmake_script.comment(f"'{ra}' repository agent")
cmake_script.comment("Delete this section to remove repository agent from build")
cmake_script.comment()
cmake_script.mkdir(build_dir)
cmake_script.cwd(build_dir)
cmake_script.gitclone(
repoagent_repo(ra), repoagents[ra], ra, FLAGS.github_organization
)
cmake_script.mkdir(repo_build_dir)
cmake_script.cwd(repo_build_dir)
cmake_script.cmake(repoagent_cmake_args(images, components, ra, repo_install_dir))
cmake_script.makeinstall()
cmake_script.mkdir(os.path.join(install_dir, "repoagents"))
cmake_script.rmdir(os.path.join(install_dir, "repoagents", ra))
cmake_script.cpdir(
os.path.join(repo_install_dir, "repoagents", ra),
os.path.join(install_dir, "repoagents"),
)
cmake_script.comment()
cmake_script.comment(f"end '{ra}' repository agent")
cmake_script.commentln(8)
cmake_script.blankln()
def cache_build(cache, cmake_script, build_dir, install_dir, cache_repo, caches):
repo_build_dir = os.path.join(build_dir, cache, "build")
repo_install_dir = os.path.join(build_dir, cache, "install")
cmake_script.commentln(8)
cmake_script.comment(f"'{cache}' cache")
cmake_script.comment("Delete this section to remove cache from build")
cmake_script.comment()
cmake_script.mkdir(build_dir)
cmake_script.cwd(build_dir)
cmake_script.gitclone(
cache_repo(cache), caches[cache], cache, FLAGS.github_organization
)
cmake_script.mkdir(repo_build_dir)
cmake_script.cwd(repo_build_dir)
cmake_script.cmake(cache_cmake_args(images, components, cache, repo_install_dir))
cmake_script.makeinstall()
cmake_script.mkdir(os.path.join(install_dir, "caches"))
cmake_script.rmdir(os.path.join(install_dir, "caches", cache))
cmake_script.cpdir(
os.path.join(repo_install_dir, "caches", cache),
os.path.join(install_dir, "caches"),
)
cmake_script.comment()
cmake_script.comment(f"end '{cache}' cache")
cmake_script.commentln(8)
cmake_script.blankln()
def cibase_build(
cmake_script, repo_dir, cmake_dir, build_dir, install_dir, ci_dir, backends
):
repo_install_dir = os.path.join(build_dir, "tritonserver", "install")
cmake_script.commentln(8)
cmake_script.comment("Collect Triton CI artifacts")
cmake_script.comment()
cmake_script.mkdir(ci_dir)
# On windows we are not yet using a CI/QA docker image for
# testing, so don't do anything...
if target_platform() == "windows":
return
# The core build produces some artifacts that are needed for CI
# testing, so include those in the install.
cmake_script.cpdir(os.path.join(repo_dir, "qa"), ci_dir)
cmake_script.cpdir(os.path.join(repo_dir, "deploy"), ci_dir)
cmake_script.mkdir(os.path.join(ci_dir, "docs"))
cmake_script.cpdir(
os.path.join(repo_dir, "docs", "examples"), os.path.join(ci_dir, "docs")
)
cmake_script.mkdir(os.path.join(ci_dir, "src", "test"))
cmake_script.cpdir(
os.path.join(repo_dir, "src", "test", "models"),
os.path.join(ci_dir, "src", "test"),
)
# Skip copying the artifacts in the bin, lib, and python as those directories will
# be missing when the core build is not enabled.
if not FLAGS.no_core_build:
cmake_script.cpdir(os.path.join(repo_install_dir, "bin"), ci_dir)
cmake_script.mkdir(os.path.join(ci_dir, "lib"))
cmake_script.cp(
os.path.join(repo_install_dir, "lib", "libtritonrepoagent_relocation.so"),
os.path.join(ci_dir, "lib"),
)
cmake_script.cpdir(os.path.join(repo_install_dir, "python"), ci_dir)
# Some of the backends are needed for CI testing
cmake_script.mkdir(os.path.join(ci_dir, "backends"))
for be in ("identity", "repeat", "square"):
be_install_dir = os.path.join(build_dir, be, "install", "backends", be)
if target_platform() == "windows":
cmake_script.cmd(f"if (Test-Path -Path {be_install_dir}) {{")
else:
cmake_script.cmd(f"if [[ -e {be_install_dir} ]]; then")
cmake_script.cpdir(be_install_dir, os.path.join(ci_dir, "backends"))
cmake_script.cmd("}" if target_platform() == "windows" else "fi")
# Some of the unit-test built backends are needed for CI testing
cmake_script.mkdir(os.path.join(ci_dir, "tritonbuild", "tritonserver", "backends"))
for be in (
"query",
"implicit_state",
"sequence",
"dyna_sequence",
"distributed_addsub",
"iterative_sequence",
):
be_install_dir = os.path.join(repo_install_dir, "backends", be)
if target_platform() == "windows":
cmake_script.cmd(f"if (Test-Path -Path {be_install_dir}) {{")
else:
cmake_script.cmd(f"if [[ -e {be_install_dir} ]]; then")
cmake_script.cpdir(
be_install_dir,
os.path.join(ci_dir, "tritonbuild", "tritonserver", "backends"),
)
cmake_script.cmd("}" if target_platform() == "windows" else "fi")
# The onnxruntime_backend build produces some artifacts that
# are needed for CI testing.
if "onnxruntime" in backends:
ort_install_dir = os.path.join(build_dir, "onnxruntime", "install")
cmake_script.mkdir(os.path.join(ci_dir, "qa", "L0_custom_ops"))
if target_platform() != "igpu":
cmake_script.cp(
os.path.join(ort_install_dir, "test", "libcustom_op_library.so"),
os.path.join(ci_dir, "qa", "L0_custom_ops"),
)
cmake_script.cp(
os.path.join(ort_install_dir, "test", "custom_op_test.onnx"),
os.path.join(ci_dir, "qa", "L0_custom_ops"),
)
# [WIP] other way than wildcard?
backend_tests = os.path.join(build_dir, "onnxruntime", "test", "*")
cmake_script.cpdir(backend_tests, os.path.join(ci_dir, "qa"))
# Need the build area for some backends so that they can be
# rebuilt with specific options.
cmake_script.mkdir(os.path.join(ci_dir, "tritonbuild"))
for be in ("identity", "python"):
if be in backends:
cmake_script.rmdir(os.path.join(build_dir, be, "build"))
cmake_script.rmdir(os.path.join(build_dir, be, "install"))
cmake_script.cpdir(
os.path.join(build_dir, be), os.path.join(ci_dir, "tritonbuild")
)
cmake_script.comment()
cmake_script.comment("end Triton CI artifacts")
cmake_script.commentln(8)
cmake_script.blankln()
def finalize_build(cmake_script, install_dir, ci_dir):
cmake_script.cmd(f"chmod -R a+rw {install_dir}")
cmake_script.cmd(f"chmod -R a+rw {ci_dir}")
def enable_all():
if target_platform() != "windows":
all_backends = [
"ensemble",
"identity",
"square",
"repeat",
"onnxruntime",
"python",
"dali",
"pytorch",
"openvino",
"fil",
"tensorrt",
]
all_repoagents = ["checksum"]
all_caches = ["local", "redis"]
all_filesystems = ["gcs", "s3", "azure_storage"]
all_endpoints = ["http", "grpc", "sagemaker", "vertex-ai"]
FLAGS.enable_logging = True
FLAGS.enable_stats = True
FLAGS.enable_metrics = True
FLAGS.enable_gpu_metrics = True
FLAGS.enable_cpu_metrics = True
FLAGS.enable_tracing = True
FLAGS.enable_nvtx = True
FLAGS.enable_gpu = True
else:
all_backends = [
"ensemble",
"identity",
"square",
"repeat",
"onnxruntime",
"openvino",
"tensorrt",
]
all_repoagents = ["checksum"]
all_caches = ["local", "redis"]
all_filesystems = []
all_endpoints = ["http", "grpc"]
FLAGS.enable_logging = True
FLAGS.enable_stats = True
FLAGS.enable_tracing = True
FLAGS.enable_gpu = True
requested_backends = []
for be in FLAGS.backend:
parts = be.split(":")
requested_backends += [parts[0]]
for be in all_backends:
if be not in requested_backends:
FLAGS.backend += [be]
requested_repoagents = []
for ra in FLAGS.repoagent:
parts = ra.split(":")
requested_repoagents += [parts[0]]
for ra in all_repoagents:
if ra not in requested_repoagents:
FLAGS.repoagent += [ra]
requested_caches = []
for cache in FLAGS.cache:
parts = cache.split(":")
requested_caches += [parts[0]]
for cache in all_caches:
if cache not in requested_caches:
FLAGS.cache += [cache]
for fs in all_filesystems:
if fs not in FLAGS.filesystem:
FLAGS.filesystem += [fs]
for ep in all_endpoints:
if ep not in FLAGS.endpoint:
FLAGS.endpoint += [ep]
if __name__ == "__main__":
parser = argparse.ArgumentParser()
group_qv = parser.add_mutually_exclusive_group()
group_qv.add_argument(
"-q",
"--quiet",
action="store_true",
required=False,
help="Disable console output.",
)
group_qv.add_argument(
"-v",
"--verbose",
action="store_true",
required=False,
help="Enable verbose output.",
)
parser.add_argument(
"--dryrun",
action="store_true",
required=False,
help="Output the build scripts, but do not perform build.",
)
parser.add_argument(
"--no-container-build",
action="store_true",
required=False,
help="Do not use Docker container for build.",
)
parser.add_argument(
"--use-user-docker-config",
default=None,
required=False,
help="Path to the Docker configuration file to be used when performing container build.",
)
parser.add_argument(
"--no-container-interactive",
action="store_true",
required=False,
help='Do not use -it argument to "docker run" when performing container build.',
)
parser.add_argument(
"--no-container-pull",
action="store_true",
required=False,
help="Do not use Docker --pull argument when building container.",
)
parser.add_argument(
"--container-memory",
default=None,
required=False,
help="Value for Docker --memory argument. Used only for windows builds.",
)
parser.add_argument(
"--target-platform",
required=False,
default=None,
help='Target platform for build, can be "linux", "rhel", "windows" or "igpu". If not specified, build targets the current platform.',
)
parser.add_argument(
"--target-machine",
required=False,
default=None,
help="Target machine/architecture for build. If not specified, build targets the current machine/architecture.",
)
parser.add_argument(
"--build-id",
type=str,
required=False,
help="Build ID associated with the build.",
)
parser.add_argument(
"--build-sha", type=str, required=False, help="SHA associated with the build."
)
parser.add_argument(
"--build-dir",
type=str,
required=False,
help="Build directory. All repo clones and builds will be performed in this directory.",
)
parser.add_argument(
"--install-dir",
type=str,
required=False,
default=None,
help="Install directory, default is <builddir>/opt/tritonserver.",
)
parser.add_argument(
"--cmake-dir",
type=str,
required=False,
help="Directory containing the CMakeLists.txt file for Triton server.",
)
parser.add_argument(
"--tmp-dir",
type=str,
required=False,
default="/tmp",
help="Temporary directory used for building inside docker. Default is /tmp.",
)
parser.add_argument(
"--library-paths",
action="append",
required=False,
default=None,
help="Specify library paths for respective backends in build as <backend-name>[:<library_path>].",
)
parser.add_argument(
"--build-type",
required=False,
default="Release",
help='Build type, one of "Release", "Debug", "RelWithDebInfo" or "MinSizeRel". Default is "Release".',
)
parser.add_argument(
"-j",
"--build-parallel",
type=int,
required=False,
default=None,
help="Build parallelism. Defaults to 2 * number-of-cores.",
)
parser.add_argument(
"--github-organization",
type=str,
required=False,
default="https://github.com/triton-inference-server",
help='The GitHub organization containing the repos used for the build. Defaults to "https://github.com/triton-inference-server".',
)
parser.add_argument(
"--version",
type=str,
required=False,
help="The Triton version. If not specified defaults to the value in the TRITON_VERSION file.",
)
parser.add_argument(
"--container-version",
type=str,
required=False,
help="The Triton container version to build. If not specified the container version will be chosen automatically based on --version value.",
)
parser.add_argument(
"--container-prebuild-command",
type=str,
required=False,
help="When performing a container build, this command will be executed within the container just before the build it performed.",
)
parser.add_argument(
"--no-container-source",
action="store_true",
required=False,
help="Do not include OSS source code in Docker container.",
)
parser.add_argument(
"--image",
action="append",
required=False,
help='Use specified Docker image in build as <image-name>,<full-image-name>. <image-name> can be "base", "gpu-base", or "pytorch".',
)
parser.add_argument(
"--enable-all",
action="store_true",
required=False,
help="Enable all standard released Triton features, backends, repository agents, caches, endpoints and file systems.",
)
parser.add_argument(
"--enable-logging", action="store_true", required=False, help="Enable logging."
)
parser.add_argument(
"--enable-stats",
action="store_true",
required=False,
help="Enable statistics collection.",
)
parser.add_argument(
"--enable-metrics",
action="store_true",
required=False,
help="Enable metrics reporting.",
)
parser.add_argument(
"--enable-gpu-metrics",
action="store_true",
required=False,
help="Include GPU metrics in reported metrics.",
)
parser.add_argument(
"--enable-cpu-metrics",
action="store_true",
required=False,
help="Include CPU metrics in reported metrics.",
)
parser.add_argument(
"--enable-tracing", action="store_true", required=False, help="Enable tracing."
)
parser.add_argument(
"--enable-nvtx", action="store_true", required=False, help="Enable NVTX."
)
parser.add_argument(
"--enable-gpu", action="store_true", required=False, help="Enable GPU support."
)
parser.add_argument(
"--enable-mali-gpu",
action="store_true",
required=False,
help="Enable ARM MALI GPU support.",
)
parser.add_argument(
"--min-compute-capability",
type=str,
required=False,
default="6.0",
help="Minimum CUDA compute capability supported by server.",
)
parser.add_argument(
"--endpoint",
action="append",
required=False,
help='Include specified endpoint in build. Allowed values are "grpc", "http", "vertex-ai" and "sagemaker".',
)
parser.add_argument(
"--filesystem",
action="append",
required=False,
help='Include specified filesystem in build. Allowed values are "gcs", "azure_storage" and "s3".',
)
parser.add_argument(
"--no-core-build",
action="store_true",
required=False,
help="Do not build Triton core shared library or executable.",
)
parser.add_argument(
"--backend",
action="append",
required=False,
help='Include specified backend in build as <backend-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
)
parser.add_argument(
"--repo-tag",
action="append",
required=False,
help='The version of a component to use in the build as <component-name>:<repo-tag>. <component-name> can be "common", "core", "backend" or "thirdparty". <repo-tag> indicates the git tag/branch to use for the build. Currently <repo-tag> does not support pull-request reference. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
)
parser.add_argument(
"--repoagent",
action="append",
required=False,
help='Include specified repo agent in build as <repoagent-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
)
parser.add_argument(
"--cache",
action="append",
required=False,
help='Include specified cache in build as <cache-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
)
parser.add_argument(
"--no-force-clone",
action="store_true",
default=False,
help="Do not create fresh clones of repos that have already been cloned.",
)
parser.add_argument(
"--extra-core-cmake-arg",
action="append",
required=False,
help="Extra CMake argument as <name>=<value>. The argument is passed to CMake as -D<name>=<value> and is included after all CMake arguments added by build.py for the core builds.",
)
parser.add_argument(
"--override-core-cmake-arg",
action="append",
required=False,
help="Override specified CMake argument in the build as <name>=<value>. The argument is passed to CMake as -D<name>=<value>. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the core build use --extra-core-cmake-arg.",
)
parser.add_argument(
"--extra-backend-cmake-arg",
action="append",
required=False,
help="Extra CMake argument for a backend build as <backend>:<name>=<value>. The argument is passed to CMake as -D<name>=<value> and is included after all CMake arguments added by build.py for the backend.",
)
parser.add_argument(
"--override-backend-cmake-arg",
action="append",
required=False,
help="Override specified backend CMake argument in the build as <backend>:<name>=<value>. The argument is passed to CMake as -D<name>=<value>. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the backend build use --extra-backend-cmake-arg.",
)
parser.add_argument(
"--release-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["release_version"],
help="This flag sets the release version for Triton Inference Server to be built. Default: the latest released version.",
)
parser.add_argument(
"--triton-container-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["triton_container_version"],
help="This flag sets the container version for Triton Inference Server to be built. Default: the latest released version.",
)
parser.add_argument(
"--upstream-container-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["upstream_container_version"],
help="This flag sets the upstream container version for Triton Inference Server to be built. Default: the latest released version.",
)
parser.add_argument(
"--ort-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["ort_version"],
help="This flag sets the ORT version for Triton Inference Server to be built. Default: the latest supported version.",
)
parser.add_argument(
"--ort-openvino-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["ort_openvino_version"],
help="This flag sets the OpenVino version for Triton Inference Server to be built. Default: the latest supported version.",
)
parser.add_argument(
"--standalone-openvino-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["standalone_openvino_version"],
help="This flag sets the standalon OpenVino version for Triton Inference Server to be built. Default: the latest supported version.",
)
parser.add_argument(
"--dcgm-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["dcgm_version"],
help="This flag sets the DCGM version for Triton Inference Server to be built. Default: the latest supported version.",
)
parser.add_argument(
"--vllm-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["vllm_version"],
help="This flag sets the vLLM version for Triton Inference Server to be built. Default: the latest supported version.",
)
parser.add_argument(
"--rhel-py-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["rhel_py_version"],
help="This flag sets the Python version for RHEL platform of Triton Inference Server to be built. Default: the latest supported version.",
)
parser.add_argument(
"--build-secret",
action="append",
required=False,
nargs=2,
metavar=("key", "value"),
help="Add build secrets in the form of <key> <value>. These secrets are used during the build process for vllm. The secrets are passed to the Docker build step as `--secret id=<key>`. The following keys are expected and their purposes are described below:\n\n"
" - 'req': A file containing a list of dependencies for pip (e.g., requirements.txt).\n"
" - 'build_public_vllm': A flag (default is 'true') indicating whether to build the public VLLM version.\n\n"
"Ensure that the required environment variables for these secrets are set before running the build.",
)
parser.add_argument(
"--triton-wheels-dependencies-group",
required=False,
type=str,
default="all",
help="The group of dependencies for Triton wheels to be installed. Default value is 'all'.",
)
FLAGS = parser.parse_args()
if FLAGS.image is None:
FLAGS.image = []
if FLAGS.repo_tag is None:
FLAGS.repo_tag = []
if FLAGS.backend is None:
FLAGS.backend = []
if FLAGS.endpoint is None:
FLAGS.endpoint = []
if FLAGS.filesystem is None:
FLAGS.filesystem = []
if FLAGS.repoagent is None:
FLAGS.repoagent = []
if FLAGS.cache is None:
FLAGS.cache = []
if FLAGS.library_paths is None:
FLAGS.library_paths = []
if FLAGS.extra_core_cmake_arg is None:
FLAGS.extra_core_cmake_arg = []
if FLAGS.override_core_cmake_arg is None:
FLAGS.override_core_cmake_arg = []
if FLAGS.override_backend_cmake_arg is None:
FLAGS.override_backend_cmake_arg = []
if FLAGS.extra_backend_cmake_arg is None:
FLAGS.extra_backend_cmake_arg = []
if FLAGS.build_secret is None:
FLAGS.build_secret = []
FLAGS.boost_url = os.getenv(
"TRITON_BOOST_URL",
"https://archives.boost.io/release/1.80.0/source/boost_1_80_0.tar.gz",
)
FLAGS.boost_sha256 = (
"4b2136f98bdd1f5857f1c3dea9ac2018effe65286cf251534b6ae20cc45e1847"
)
# if --enable-all is specified, then update FLAGS to enable all
# settings, backends, repo-agents, caches, file systems, endpoints, etc.
if FLAGS.enable_all:
enable_all()
# When doing a docker build, --build-dir, --install-dir and
# --cmake-dir must not be set. We will use the build/ subdir
# within the server/ repo that contains this build.py script for
# --build-dir. If not doing a docker build, --build-dir must be
# set.
if FLAGS.no_container_build:
if FLAGS.build_dir is None:
fail("--no-container-build requires --build-dir")
if FLAGS.install_dir is None:
FLAGS.install_dir = os.path.join(FLAGS.build_dir, "opt", "tritonserver")
if FLAGS.cmake_dir is None:
FLAGS.cmake_dir = THIS_SCRIPT_DIR
else:
if FLAGS.build_dir is not None:
fail("--build-dir must not be set for container-based build")
if FLAGS.install_dir is not None:
fail("--install-dir must not be set for container-based build")
if FLAGS.cmake_dir is not None:
fail("--cmake-dir must not be set for container-based build")
FLAGS.build_dir = os.path.join(THIS_SCRIPT_DIR, "build")
# Determine the versions. Start with Triton version, if --version
# is not explicitly specified read from TRITON_VERSION file.
if FLAGS.version is None:
FLAGS.version = DEFAULT_TRITON_VERSION_MAP["release_version"]
if FLAGS.build_parallel is None:
FLAGS.build_parallel = multiprocessing.cpu_count() * 2
log("Building Triton Inference Server")
log("platform {}".format(target_platform()))
log("machine {}".format(target_machine()))
log("version {}".format(FLAGS.version))
log("build dir {}".format(FLAGS.build_dir))
log("install dir {}".format(FLAGS.install_dir))
log("cmake dir {}".format(FLAGS.cmake_dir))
# Determine the default repo-tag that should be used for images,
# backends, repo-agents, and caches if a repo-tag is not given
# explicitly. For release branches we use the release branch as
# the default, otherwise we use 'main'.
default_repo_tag = (
"main"
if FLAGS.triton_container_version.endswith("dev")
else "r" + FLAGS.triton_container_version
)
log("default repo-tag: {}".format(default_repo_tag))
# For other versions use the TRITON_VERSION_MAP unless explicitly
# given.
FLAGS.container_version, FLAGS.upstream_container_version = container_versions(
FLAGS.version, FLAGS.container_version, FLAGS.upstream_container_version
)
log("container version {}".format(FLAGS.container_version))
log("upstream container version {}".format(FLAGS.upstream_container_version))
for ep in FLAGS.endpoint:
log(f'endpoint "{ep}"')
for fs in FLAGS.filesystem:
log(f'filesystem "{fs}"')
# Initialize map of backends to build and repo-tag for each.
backends = {}
for be in FLAGS.backend:
parts = be.split(":")
if len(parts) == 1:
parts.append(default_repo_tag)
log('backend "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
backends[parts[0]] = parts[1]
if "vllm" in backends:
if "python" not in backends:
log(
"vLLM backend requires Python backend, adding Python backend with tag {}".format(
backends["vllm"]
)
)
backends["python"] = backends["vllm"]
secrets = dict(getattr(FLAGS, "build_secret", []))
if secrets:
requirements = secrets.get("req", "")
build_public_vllm = secrets.get("build_public_vllm", "true")
log('Build Arg for BUILD_PUBLIC_VLLM: "{}"'.format(build_public_vllm))
# Initialize map of repo agents to build and repo-tag for each.
repoagents = {}
for be in FLAGS.repoagent:
parts = be.split(":")
if len(parts) == 1:
parts.append(default_repo_tag)
log('repoagent "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
repoagents[parts[0]] = parts[1]
# Initialize map of caches to build and repo-tag for each.
caches = {}
for be in FLAGS.cache:
parts = be.split(":")
if len(parts) == 1:
parts.append(default_repo_tag)
log('cache "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
caches[parts[0]] = parts[1]
# Initialize map of docker images.
images = {}
for img in FLAGS.image:
parts = img.split(",")
fail_if(
len(parts) != 2, "--image must specify <image-name>,<full-image-registry>"
)
fail_if(
parts[0] not in ["base", "gpu-base", "pytorch", "inference"],
"unsupported value for --image",
)
log('image "{}": "{}"'.format(parts[0], parts[1]))
images[parts[0]] = parts[1]
# Initialize map of library paths for each backend.
library_paths = {}
for lpath in FLAGS.library_paths:
parts = lpath.split(":")
if len(parts) == 2:
log('backend "{}" library path "{}"'.format(parts[0], parts[1]))
library_paths[parts[0]] = parts[1]
# Parse any explicitly specified cmake arguments
for cf in FLAGS.extra_core_cmake_arg:
parts = cf.split("=")
fail_if(len(parts) != 2, "--extra-core-cmake-arg must specify <name>=<value>")
log('CMake core extra "-D{}={}"'.format(parts[0], parts[1]))
EXTRA_CORE_CMAKE_FLAGS[parts[0]] = parts[1]
for cf in FLAGS.override_core_cmake_arg:
parts = cf.split("=")
fail_if(
len(parts) != 2, "--override-core-cmake-arg must specify <name>=<value>"
)
log('CMake core override "-D{}={}"'.format(parts[0], parts[1]))
OVERRIDE_CORE_CMAKE_FLAGS[parts[0]] = parts[1]
for cf in FLAGS.extra_backend_cmake_arg:
parts = cf.split(":", 1)
fail_if(
len(parts) != 2,
"--extra-backend-cmake-arg must specify <backend>:<name>=<value>",
)
be = parts[0]
parts = parts[1].split("=", 1)
fail_if(
len(parts) != 2,
"--extra-backend-cmake-arg must specify <backend>:<name>=<value>",
)
fail_if(
be not in backends,
'--extra-backend-cmake-arg specifies backend "{}" which is not included in build'.format(
be
),
)
log('backend "{}" CMake extra "-D{}={}"'.format(be, parts[0], parts[1]))
if be not in EXTRA_BACKEND_CMAKE_FLAGS:
EXTRA_BACKEND_CMAKE_FLAGS[be] = {}
EXTRA_BACKEND_CMAKE_FLAGS[be][parts[0]] = parts[1]
for
gitextract_5r79t389/
├── .clang-format
├── .dockerignore
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ └── feature_request.md
│ ├── PULL_REQUEST_TEMPLATE/
│ │ ├── pull_request_template_external_contrib.md
│ │ └── pull_request_template_internal_contrib.md
│ ├── pull_request_template.md
│ └── workflows/
│ ├── codeql.yml
│ └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Dockerfile.QA
├── Dockerfile.sdk
├── Dockerfile.win10.min
├── LICENSE
├── README.md
├── SECURITY.md
├── TRITON_VERSION
├── build.py
├── compose.py
├── deploy/
│ ├── alibaba-cloud/
│ │ └── README.md
│ ├── aws/
│ │ ├── Chart.yaml
│ │ ├── README.md
│ │ ├── dashboard.json
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ ├── secrets.yaml
│ │ │ └── service.yaml
│ │ └── values.yaml
│ ├── fleetcommand/
│ │ ├── Chart.yaml
│ │ ├── README.md
│ │ ├── dashboard.json
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── configmap-grafana-dashboard.yaml
│ │ │ ├── deployment.yaml
│ │ │ ├── secrets.yaml
│ │ │ └── service.yaml
│ │ └── values.yaml
│ ├── gcp/
│ │ ├── Chart.yaml
│ │ ├── README.md
│ │ ├── dashboard.json
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ └── service.yaml
│ │ └── values.yaml
│ ├── gke-marketplace-app/
│ │ ├── README.md
│ │ ├── benchmark/
│ │ │ ├── README.md
│ │ │ ├── model-store/
│ │ │ │ ├── bert_base_tf_cpu/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── bert_base_tf_gpu/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── bert_base_trt_gpu/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── bert_base_trt_gpu_seqlen128/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── bert_distill_tf_cpu/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ └── bert_distill_tf_gpu/
│ │ │ │ └── config.pbtxt
│ │ │ └── perf-analyzer-script/
│ │ │ ├── perf_query.sh
│ │ │ └── triton_client.yaml
│ │ ├── client-sample/
│ │ │ ├── bert_request.json
│ │ │ ├── locustfile_bert.py
│ │ │ └── perf_analyzer_grpc.sh
│ │ ├── server-deployer/
│ │ │ ├── Dockerfile
│ │ │ ├── build_and_push.sh
│ │ │ ├── chart/
│ │ │ │ └── triton/
│ │ │ │ ├── Chart.yaml
│ │ │ │ ├── templates/
│ │ │ │ │ ├── _helpers.tpl
│ │ │ │ │ ├── application.yaml
│ │ │ │ │ ├── deployment.yaml
│ │ │ │ │ ├── hpa.yaml
│ │ │ │ │ ├── ingress.yaml
│ │ │ │ │ └── service.yaml
│ │ │ │ └── values.yaml
│ │ │ ├── data-test/
│ │ │ │ └── schema.yaml
│ │ │ └── schema.yaml
│ │ └── trt-engine/
│ │ └── README.md
│ ├── k8s-onprem/
│ │ ├── Chart.yaml
│ │ ├── README.md
│ │ ├── dashboard.json
│ │ ├── templates/
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ ├── hpa.yaml
│ │ │ ├── ingressroute.yaml
│ │ │ ├── rbac.yaml
│ │ │ ├── service.yaml
│ │ │ └── serviceaccount.yaml
│ │ └── values.yaml
│ ├── mlflow-triton-plugin/
│ │ ├── README.md
│ │ ├── examples/
│ │ │ ├── expected_output.json
│ │ │ ├── input.json
│ │ │ └── onnx_float32_int32_int32/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── mlflow_triton/
│ │ │ ├── __init__.py
│ │ │ ├── config.py
│ │ │ └── deployments.py
│ │ ├── scripts/
│ │ │ ├── publish_model_to_mlflow.py
│ │ │ └── triton_flavor.py
│ │ └── setup.py
│ └── oci/
│ ├── Chart.yaml
│ ├── README.md
│ ├── dashboard.json
│ ├── templates/
│ │ ├── _helpers.tpl
│ │ ├── deployment.yaml
│ │ ├── secrets.yaml
│ │ └── service.yaml
│ └── values.yaml
├── docker/
│ ├── README.third-party-src
│ ├── cpu_only/
│ │ ├── entrypoint.d/
│ │ │ ├── 12-banner.sh
│ │ │ └── 50-gpu-driver-check2.sh
│ │ └── nvidia_entrypoint.sh
│ ├── entrypoint.d/
│ │ ├── 10-banner.txt
│ │ ├── 15-container-copyright.txt
│ │ ├── 50-gpu-driver-check2.sh
│ │ ├── 56-network-driver-version-check.sh
│ │ ├── 70-shm-check.sh
│ │ └── 99-check-run-aip-mode.sh
│ └── sagemaker/
│ └── serve
├── docs/
│ ├── Dockerfile.docs
│ ├── Makefile
│ ├── README.md
│ ├── _reference/
│ │ └── tritonclient_api.rst
│ ├── _static/
│ │ ├── .gitattributes
│ │ ├── custom.css
│ │ └── rtd-data.js
│ ├── _templates/
│ │ └── layout.html
│ ├── backend_guide/
│ │ └── vllm.rst
│ ├── client_guide/
│ │ ├── api_reference.rst
│ │ ├── in_process.rst
│ │ ├── kserve.rst
│ │ ├── kserve_extension.rst
│ │ └── python.rst
│ ├── conf.py
│ ├── contents.rst
│ ├── customization_guide/
│ │ ├── build.md
│ │ ├── compose.md
│ │ ├── deploy.md
│ │ ├── inference_protocols.md
│ │ ├── inprocess_c_api.md
│ │ ├── inprocess_java_api.md
│ │ ├── repository_agents.md
│ │ ├── sagemaker.md
│ │ ├── test.md
│ │ └── tritonfrontend.md
│ ├── examples/
│ │ ├── README.md
│ │ ├── fetch_models.sh
│ │ ├── jetson/
│ │ │ ├── README.md
│ │ │ └── concurrency_and_dynamic_batching/
│ │ │ ├── Makefile
│ │ │ ├── README.md
│ │ │ ├── common.h
│ │ │ ├── labels.txt
│ │ │ ├── people_detection.cc
│ │ │ ├── tao/
│ │ │ │ ├── convert_peoplenet.sh
│ │ │ │ └── models/
│ │ │ │ └── peoplenet/
│ │ │ │ └── .gitkeep
│ │ │ ├── trtis_model_repo_sample_1/
│ │ │ │ └── peoplenet/
│ │ │ │ ├── 1/
│ │ │ │ │ └── .gitkeep
│ │ │ │ └── config.pbtxt
│ │ │ └── trtis_model_repo_sample_2/
│ │ │ └── peoplenet/
│ │ │ ├── 1/
│ │ │ │ └── .gitkeep
│ │ │ └── config.pbtxt
│ │ └── model_repository/
│ │ ├── densenet_onnx/
│ │ │ ├── config.pbtxt
│ │ │ └── densenet_labels.txt
│ │ ├── inception_onnx/
│ │ │ ├── config.pbtxt
│ │ │ └── inception_labels.txt
│ │ ├── simple/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── simple_dyna_sequence/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── simple_identity/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── simple_int8/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── simple_sequence/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ └── simple_string/
│ │ ├── 1/
│ │ │ └── model.onnx
│ │ └── config.pbtxt
│ ├── exclusions.txt
│ ├── generate_docs.py
│ ├── getting_started/
│ │ ├── llm.md
│ │ ├── quick_deployment.rst
│ │ ├── quickstart.md
│ │ └── trtllm_user_guide.md
│ ├── index.md
│ ├── introduction/
│ │ ├── compatibility.md
│ │ ├── index.md
│ │ └── release_notes.md
│ ├── llm_features/
│ │ └── speculative_decoding.rst
│ ├── perf_benchmark/
│ │ ├── genai_perf.rst
│ │ ├── model_analyzer.rst
│ │ └── perf_analyzer.rst
│ ├── protocol/
│ │ ├── README.md
│ │ ├── extension_binary_data.md
│ │ ├── extension_classification.md
│ │ ├── extension_generate.md
│ │ ├── extension_logging.md
│ │ ├── extension_model_configuration.md
│ │ ├── extension_model_repository.md
│ │ ├── extension_parameters.md
│ │ ├── extension_schedule_policy.md
│ │ ├── extension_sequence.md
│ │ ├── extension_shared_memory.md
│ │ ├── extension_statistics.md
│ │ └── extension_trace.md
│ ├── repositories.txt
│ ├── scaling_guide/
│ │ └── scaling_guide.rst
│ ├── server_guide/
│ │ ├── features.rst
│ │ ├── model_pipelines.rst
│ │ └── state_management.rst
│ └── user_guide/
│ ├── architecture.md
│ ├── batcher.md
│ ├── bls.md
│ ├── custom_operations.md
│ ├── debugging_guide.md
│ ├── decoupled_models.md
│ ├── ensemble_models.md
│ ├── faq.md
│ ├── implicit_state_management.md
│ ├── jetson.md
│ ├── metrics.md
│ ├── model_analyzer.md
│ ├── model_configuration.md
│ ├── model_execution.md
│ ├── model_management.md
│ ├── model_repository.md
│ ├── optimization.md
│ ├── perf_analyzer.md
│ ├── performance_tuning.md
│ ├── ragged_batching.md
│ ├── rate_limiter.md
│ ├── request_cancellation.md
│ ├── response_cache.md
│ ├── scheduler.md
│ ├── trace.md
│ └── v1_to_v2.md
├── enhancements/
│ ├── NNNN-template-complete.md
│ ├── NNNN-template-limited.md
│ ├── README.md
│ └── teps/
│ └── 0000-tep-process.md
├── pyproject.toml
├── python/
│ └── openai/
│ ├── README.md
│ ├── openai_frontend/
│ │ ├── __init__.py
│ │ ├── engine/
│ │ │ ├── __init__.py
│ │ │ ├── engine.py
│ │ │ ├── triton_engine.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── chat.py
│ │ │ ├── tokenizer.py
│ │ │ ├── tool_call_parsers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── llama_tool_call_parser.py
│ │ │ │ ├── mistral_tool_call_parser.py
│ │ │ │ ├── tool_call_parser.py
│ │ │ │ └── utils.py
│ │ │ └── triton.py
│ │ ├── frontend/
│ │ │ ├── __init__.py
│ │ │ ├── fastapi/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── middleware/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── api_restriction.py
│ │ │ │ └── routers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── chat.py
│ │ │ │ ├── completions.py
│ │ │ │ ├── embeddings.py
│ │ │ │ ├── models.py
│ │ │ │ └── observability.py
│ │ │ ├── fastapi_frontend.py
│ │ │ └── frontend.py
│ │ ├── main.py
│ │ ├── schemas/
│ │ │ ├── __init__.py
│ │ │ └── openai.py
│ │ └── utils/
│ │ └── utils.py
│ ├── requirements-test.txt
│ ├── requirements.txt
│ └── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_chat_completions.py
│ ├── test_completions.py
│ ├── test_embeddings.py
│ ├── test_lora.py
│ ├── test_models/
│ │ ├── identity_py/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ └── mock_llm/
│ │ ├── 1/
│ │ │ └── model.py
│ │ └── config.pbtxt
│ ├── test_observability.py
│ ├── test_openai_client.py
│ ├── test_openai_restricted_apis.py
│ ├── test_tool_calling.py
│ ├── utils.py
│ ├── vllm_embedding_models/
│ │ └── all-MiniLM-L6-v2/
│ │ ├── 1/
│ │ │ └── model.json
│ │ └── config.pbtxt
│ ├── vllm_mistral_models/
│ │ └── mistral-nemo-instruct-2407/
│ │ ├── 1/
│ │ │ └── model.json
│ │ └── config.pbtxt
│ └── vllm_models/
│ └── llama-3.1-8b-instruct/
│ ├── 1/
│ │ └── model.json
│ └── config.pbtxt
├── qa/
│ ├── L0_additional_dependency_dirs/
│ │ └── test.sh
│ ├── L0_async_work_queue/
│ │ └── test.sh
│ ├── L0_backend_bls/
│ │ └── test.sh
│ ├── L0_backend_config/
│ │ └── test.sh
│ ├── L0_backend_fastertransformer/
│ │ └── test.sh
│ ├── L0_backend_identity/
│ │ ├── identity_test.py
│ │ └── test.sh
│ ├── L0_backend_onnxruntime/
│ │ ├── gen_add_bf16_onnx_model.py
│ │ ├── test.py
│ │ └── test.sh
│ ├── L0_backend_output_detail/
│ │ └── test.sh
│ ├── L0_backend_python/
│ │ ├── argument_validation/
│ │ │ ├── models/
│ │ │ │ └── argument_validation/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── test.sh
│ │ ├── async_execute/
│ │ │ ├── concurrency_test.py
│ │ │ └── test.sh
│ │ ├── bls/
│ │ │ ├── bls_parameters_test.py
│ │ │ └── test.sh
│ │ ├── common.sh
│ │ ├── custom_metrics/
│ │ │ └── test.sh
│ │ ├── decoupled/
│ │ │ ├── decoupled_test.py
│ │ │ ├── models/
│ │ │ │ ├── decoupled_bls/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_bls_async_cancel/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_bls_cancel/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_bls_cancel_after_complete/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_bls_stream/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_execute_error/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_raise_exception/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── decoupled_return_response_error/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── config.pbtxt
│ │ │ │ └── decoupled_send_after_close_error/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── test.sh
│ │ ├── ensemble/
│ │ │ ├── ensemble_test.py
│ │ │ └── test.sh
│ │ ├── env/
│ │ │ └── test.sh
│ │ ├── examples/
│ │ │ └── test.sh
│ │ ├── io/
│ │ │ ├── io_test.py
│ │ │ ├── requested_output_model/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── model.py
│ │ │ └── test.sh
│ │ ├── lifecycle/
│ │ │ ├── lifecycle_test.py
│ │ │ └── test.sh
│ │ ├── logging/
│ │ │ ├── logging_test.py
│ │ │ └── test.sh
│ │ ├── model_control/
│ │ │ ├── model_control_test.py
│ │ │ └── test.sh
│ │ ├── model_readiness/
│ │ │ ├── test.sh
│ │ │ ├── test_model_readiness.py
│ │ │ └── test_models/
│ │ │ ├── is_ready_fn_returns_true_decoupled/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── model.py
│ │ │ ├── readiness_coroutine_model.py
│ │ │ └── readiness_model.py
│ │ ├── parameters/
│ │ │ ├── response_parameters_test.py
│ │ │ └── test.sh
│ │ ├── python_based_backends/
│ │ │ ├── python_based_backends_test.py
│ │ │ └── test.sh
│ │ ├── python_test.py
│ │ ├── request_rescheduling/
│ │ │ ├── grpc_endpoint_test.py
│ │ │ └── test.sh
│ │ ├── response_sender/
│ │ │ ├── response_sender_complete_final_test.py
│ │ │ ├── response_sender_test.py
│ │ │ └── test.sh
│ │ ├── restart/
│ │ │ ├── models/
│ │ │ │ └── restart/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ ├── restart_test.py
│ │ │ └── test.sh
│ │ ├── setup_python_enviroment.sh
│ │ ├── test.sh
│ │ ├── test_infer_shm_leak.py
│ │ └── variants/
│ │ └── test.sh
│ ├── L0_backend_release/
│ │ └── test.sh
│ ├── L0_backend_tutorial/
│ │ └── test.sh
│ ├── L0_batch_custom/
│ │ ├── batch_custom_test.py
│ │ └── test.sh
│ ├── L0_batch_input/
│ │ ├── batch_input_test.py
│ │ └── test.sh
│ ├── L0_batcher/
│ │ ├── batcher_test.py
│ │ ├── queue_timeout_test.py
│ │ ├── test.sh
│ │ └── verify_timestamps.py
│ ├── L0_buffer_attributes/
│ │ ├── buffer_attributes_test.py
│ │ ├── models/
│ │ │ ├── bls/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── identity/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_client_build_variants/
│ │ └── test.sh
│ ├── L0_client_java/
│ │ └── test.sh
│ ├── L0_client_memory_growth/
│ │ ├── client_memory_mail.py
│ │ ├── models/
│ │ │ └── custom_identity_int32/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_client_nobatch/
│ │ ├── client_test.py
│ │ └── test.sh
│ ├── L0_client_timeout/
│ │ ├── client_infer_timeout_test.py
│ │ ├── client_non_infer_timeout_test.py
│ │ ├── models/
│ │ │ └── custom_identity_int32/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_client_valgrind/
│ │ ├── models/
│ │ │ └── custom_identity_int32/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_cmdline_trace/
│ │ ├── test.sh
│ │ └── trace_client.py
│ ├── L0_compute_capability/
│ │ └── test.sh
│ ├── L0_config_json/
│ │ ├── ensemble_config.pbtxt
│ │ ├── max_priority_level.pbtxt
│ │ └── test.sh
│ ├── L0_cuda_graph/
│ │ ├── test.sh
│ │ └── trt_cuda_graph_test.py
│ ├── L0_cuda_shared_memory/
│ │ ├── cuda_shared_memory_test.py
│ │ └── test.sh
│ ├── L0_custom_model_config/
│ │ └── test.sh
│ ├── L0_custom_ops/
│ │ ├── mod_op_test.py
│ │ ├── onnx_op_test.py
│ │ ├── test.sh
│ │ └── vision_op_test.py
│ ├── L0_data_compression/
│ │ ├── test.sh
│ │ └── validation.py
│ ├── L0_decoupled/
│ │ ├── decoupled_test.py
│ │ ├── models/
│ │ │ ├── fan_repeat/
│ │ │ │ └── config.pbtxt
│ │ │ ├── identity_int32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── nested_square/
│ │ │ │ └── config.pbtxt
│ │ │ ├── repeat_square/
│ │ │ │ └── config.pbtxt
│ │ │ ├── sequence_repeat/
│ │ │ │ └── config.pbtxt
│ │ │ └── simple_repeat/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_device_memory_tracker/
│ │ ├── test.py
│ │ └── test.sh
│ ├── L0_dlpack_multi_gpu/
│ │ └── test.sh
│ ├── L0_doc_links/
│ │ ├── mkdocs.yml
│ │ └── test.sh
│ ├── L0_dyna_implicit_state/
│ │ └── test.sh
│ ├── L0_dyna_sequence_batcher/
│ │ ├── dyna_sequence_batcher_test.py
│ │ └── test.sh
│ ├── L0_grpc/
│ │ ├── client_plugin_models/
│ │ │ └── client_plugin_test/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── grpc_basic_auth_test.py
│ │ ├── grpc_client_plugin_test.py
│ │ ├── nginx.conf
│ │ ├── python_grpc_aio_test.py
│ │ ├── python_unit_test.py
│ │ └── test.sh
│ ├── L0_grpc_state_cleanup/
│ │ ├── cleanup_test.py
│ │ └── test.sh
│ ├── L0_http/
│ │ ├── generate_endpoint_test.py
│ │ ├── http_basic_auth_test.py
│ │ ├── http_client_plugin_test.py
│ │ ├── http_input_size_limit_test.py
│ │ ├── http_request_many_chunks.py
│ │ ├── http_restricted_api_test.py
│ │ ├── http_test.py
│ │ ├── nginx.conf
│ │ ├── python_http_aio_test.py
│ │ └── test.sh
│ ├── L0_http_fuzz/
│ │ ├── fuzztest.py
│ │ └── test.sh
│ ├── L0_https/
│ │ ├── nginx.conf
│ │ └── test.sh
│ ├── L0_implicit_state/
│ │ ├── implicit_state.py
│ │ ├── models/
│ │ │ ├── growable_memory/
│ │ │ │ └── config.pbtxt
│ │ │ ├── no_implicit_state/
│ │ │ │ └── config.pbtxt
│ │ │ ├── no_state_update/
│ │ │ │ └── config.pbtxt
│ │ │ ├── single_state_buffer/
│ │ │ │ └── config.pbtxt
│ │ │ └── wrong_internal_state/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_infer/
│ │ ├── infer_test.py
│ │ ├── install_and_test.sh
│ │ └── test.sh
│ ├── L0_infer_reshape/
│ │ ├── infer_reshape_test.py
│ │ └── test.sh
│ ├── L0_infer_variable/
│ │ ├── infer_variable_test.py
│ │ └── test.sh
│ ├── L0_infer_zero/
│ │ ├── infer_zero_test.py
│ │ └── test.sh
│ ├── L0_input_validation/
│ │ ├── input_validation_test.py
│ │ ├── models/
│ │ │ ├── input_all_optional/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ ├── input_all_required/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── input_optional/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_io/
│ │ ├── gen_libtorch_model.py
│ │ └── test.sh
│ ├── L0_iterative_sequence/
│ │ ├── iterative_sequence_e2e.py
│ │ ├── models/
│ │ │ └── iterative_sequence/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_java_memory_growth/
│ │ ├── MemoryGrowthTest.java
│ │ └── test.sh
│ ├── L0_java_resnet/
│ │ ├── ResnetTest.java
│ │ ├── expected_output_data/
│ │ │ ├── expected_output_onnx.txt
│ │ │ ├── expected_output_pytorch.txt
│ │ │ └── expected_output_tensorflow.txt
│ │ └── test.sh
│ ├── L0_java_sequence_batcher/
│ │ ├── SequenceTest.java
│ │ └── test.sh
│ ├── L0_java_simple_example/
│ │ └── test.sh
│ ├── L0_json/
│ │ └── test.sh
│ ├── L0_large_payload/
│ │ ├── large_payload_test.py
│ │ └── test.sh
│ ├── L0_libtorch_disable_cudnn/
│ │ └── test.sh
│ ├── L0_libtorch_inference_mode/
│ │ └── test.sh
│ ├── L0_libtorch_instance_group_kind_model/
│ │ ├── client.py
│ │ ├── gen_models.py
│ │ ├── models/
│ │ │ └── libtorch_multi_device/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_libtorch_io_names/
│ │ ├── io_names_client.py
│ │ └── test.sh
│ ├── L0_libtorch_io_types/
│ │ └── test.sh
│ ├── L0_libtorch_optimized_execution/
│ │ └── test.sh
│ ├── L0_libtorch_shared_weights/
│ │ ├── libtorch_shared_weights_test.py
│ │ └── test.sh
│ ├── L0_lifecycle/
│ │ ├── ensemble_zero_1_float32/
│ │ │ └── config.pbtxt
│ │ ├── identity_zero_1_int32/
│ │ │ └── config.pbtxt
│ │ ├── lifecycle_test.py
│ │ ├── retry_model/
│ │ │ └── 1/
│ │ │ └── model.py
│ │ └── test.sh
│ ├── L0_logging/
│ │ ├── log_format_test.py
│ │ ├── logging_endpoint_test.py
│ │ └── test.sh
│ ├── L0_long_running_stress/
│ │ ├── crashing_client.py
│ │ ├── scenarios.py
│ │ ├── stress.py
│ │ ├── stress_mail.py
│ │ └── test.sh
│ ├── L0_memory/
│ │ ├── client.py
│ │ └── test.sh
│ ├── L0_memory_growth/
│ │ ├── busy_op_test.py
│ │ ├── server_memory_mail.py
│ │ └── test.sh
│ ├── L0_metrics/
│ │ ├── cpu_metrics_test.py
│ │ ├── ensemble_decoupled/
│ │ │ ├── async_execute_decouple/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── ensemble/
│ │ │ └── config.pbtxt
│ │ ├── ensemble_delay/
│ │ │ └── config.pbtxt
│ │ ├── histogram_metrics_test.py
│ │ ├── identity_delay/
│ │ │ └── config.pbtxt
│ │ ├── metrics_config_test.py
│ │ ├── metrics_queue_size_test.py
│ │ ├── model_namespacing_repos/
│ │ │ ├── addsub_repo/
│ │ │ │ ├── addsub_ensemble/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ └── composing_model/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── subadd_repo/
│ │ │ ├── composing_model/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── subadd_ensemble/
│ │ │ └── config.pbtxt
│ │ ├── pinned_memory_metrics_test.py
│ │ ├── test.sh
│ │ └── unit_test_models/
│ │ ├── identity_cache_off/
│ │ │ └── config.pbtxt
│ │ └── identity_cache_on/
│ │ └── config.pbtxt
│ ├── L0_mlflow/
│ │ ├── plugin_test.py
│ │ └── test.sh
│ ├── L0_model_config/
│ │ ├── autofill_noplatform/
│ │ │ ├── common/
│ │ │ │ └── no_version/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── custom/
│ │ │ │ ├── no_delimiter/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unknown_backend.unknown/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── ensemble/
│ │ │ │ ├── circular_dependency/
│ │ │ │ │ ├── circular_dependency/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── circular_dependency_2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── expected_2
│ │ │ │ ├── ensemble_scheduling_no_set/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── has_backend/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── inconsistent_data_type/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── inconsistent_data_type/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── int32_dim1_batch4/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── inconsistent_shape/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected_2
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim3_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── inconsistent_shape/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── instance_group_set/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── invalid_batch_size/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── invalid_batch_size/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── invalid_decoupled_branching/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── int32_dim1_nobatch_output2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── invalid_decoupled_branching/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── repeat_int32/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── invalid_decoupled_branching_2/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── invalid_decoupled_branching_2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── repeat_int32/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── invalid_input_map/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_input4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── invalid_input_map/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── invalid_output_map/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_input4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── invalid_output_map/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── model_warm_up_set/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_input_map/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_model_name/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_output_map/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_required_version/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── no_required_version/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── simple/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── no_required_version_2/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── no_required_version_2/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── simple/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── no_required_version_3/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── no_required_version_3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── simple/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── no_step/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── no_step_2/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── non_existing_model/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── non_existing_model/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── optimization_set/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── output_to_tensor_overmapped/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── redundant_tensor_as_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── expected_2
│ │ │ │ ├── redundant_tensor_as_output/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── self_circular_dependency/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_input4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── self_circular_dependency/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── tensor_to_input_overmapped/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unmapped_input/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_input4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── unmapped_input/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── unreachable_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unreachable_output/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unreachable_output_2/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unreachable_output_3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── onnx/
│ │ │ │ ├── bad_input_dims/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── bad_max_batch_size/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── bad_output_dims/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── too_few_inputs/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── too_many_inputs/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unknown_input/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unknown_output/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.onnx
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── openvino/
│ │ │ │ ├── bad_input_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── bad_output_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── too_few_inputs/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── too_many_inputs/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── unknown_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unknown_output/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── python/
│ │ │ │ ├── conflicting_max_batch_size/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── conflicting_scheduler_sequence/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── input_mismatch_datatype/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── input_mismatch_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── input_missing_datatype/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── input_missing_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── input_missing_name/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── input_wrong_property/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── model_transaction_policy_invalid_args/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── model_transaction_policy_mismatch/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── no_return/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── output_mismatch_datatype/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── output_mismatch_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── output_missing_datatype/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── output_missing_dims/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── output_missing_name/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ └── output_wrong_property/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── model.py
│ │ │ ├── pytorch/
│ │ │ │ ├── too_few_inputs/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── too_few_outputs/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── tensorrt/
│ │ │ ├── bad_dynamic_shapes_max/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_dynamic_shapes_min/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_dims/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_non_linear_format_io/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_shape/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_shape_tensor/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_input_type/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_output_dims/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_output_shape/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_output_shape_tensor/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_output_type/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── bad_outut_non_linear_format_io/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── mixed_batch_hint_dims/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── mixed_batch_hint_shape_values/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── too_few_inputs/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── too_many_inputs/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── unknown_input/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── unknown_output/
│ │ │ ├── config.pbtxt
│ │ │ └── expected
│ │ ├── autofill_noplatform_success/
│ │ │ ├── custom/
│ │ │ │ ├── empty_config.identity/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── no_backend.identity/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── ensemble/
│ │ │ │ ├── embedded_ensemble/
│ │ │ │ │ ├── embedded_ensemble/
│ │ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ │ └── expected
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── inner_ensemble/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ ├── inconsistent_shape/
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim2_nobatch/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── inconsistent_shape/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── inconsistent_shape_2/
│ │ │ │ │ ├── fp32_dim1_batch4/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ ├── fp32_dim2_nobatch/
│ │ │ │ │ │ └── config.pbtxt
│ │ │ │ │ └── inconsistent_shape_2/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unmapped_output/
│ │ │ │ ├── fp32_dim1_batch4_output3/
│ │ │ │ │ └── config.pbtxt
│ │ │ │ └── unmapped_output/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── onnx/
│ │ │ │ ├── cpu_instance/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── empty_config/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ ├── no_config/
│ │ │ │ │ ├── 1/
│ │ │ │ │ │ └── model.onnx
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ └── no_config_no_batch/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.onnx
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ ├── expected.1
│ │ │ │ ├── expected.2
│ │ │ │ └── expected.3
│ │ │ ├── openvino/
│ │ │ │ ├── dynamic_batch/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ ├── empty_config/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ ├── no_config/
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ └── partial_config/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected.1
│ │ │ ├── python/
│ │ │ │ ├── conflicting_scheduler_ensemble/
│ │ │ │ │ ├── conflicting_scheduler_ensemble/
│ │ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ │ ├── expected
│ │ │ │ │ │ └── model.py
│ │ │ │ │ ├── ensemble_first_step/
│ │ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ │ └── model.py
│ │ │ │ │ └── ensemble_second_step/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── model.py
│ │ │ │ ├── dynamic_batching/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── dynamic_batching_no_op/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── empty_config/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ └── expected.3
│ │ │ │ ├── incomplete_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── incomplete_output/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ ├── model_transaction_policy/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── model_transaction_policy_decoupled_false/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── model_transaction_policy_no_op/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ ├── expected.1
│ │ │ │ │ ├── expected.2
│ │ │ │ │ ├── expected.3
│ │ │ │ │ └── model.py
│ │ │ │ ├── optional_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ ├── expected
│ │ │ │ │ └── model.py
│ │ │ │ ├── unknown_input/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── unknown_output/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── pytorch/
│ │ │ │ ├── cpu_instance/
│ │ │ │ │ ├── config.pbtxt
│ │ │ │ │ └── expected
│ │ │ │ └── no_name_platform/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── tensorrt/
│ │ │ ├── empty_config/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── empty_config_variable/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── hint_for_no_batch/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── incomplete_input/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ ├── expected.1
│ │ │ │ ├── expected.2
│ │ │ │ └── expected.3
│ │ │ ├── incomplete_output/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ ├── expected.1
│ │ │ │ ├── expected.2
│ │ │ │ └── expected.3
│ │ │ ├── multi_prof_max_bs/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── no_config/
│ │ │ │ └── expected
│ │ │ ├── no_config_non_linear_format_io/
│ │ │ │ └── expected
│ │ │ ├── no_config_shape_tensor/
│ │ │ │ └── expected
│ │ │ ├── no_config_variable/
│ │ │ │ └── expected
│ │ │ ├── no_name_platform/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── no_name_platform_variable/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── reshape_config_provided/
│ │ │ ├── config.pbtxt
│ │ │ └── expected
│ │ ├── cli_messages/
│ │ │ ├── cli_deprecation/
│ │ │ │ └── expected
│ │ │ └── cli_override/
│ │ │ └── expected
│ │ ├── compare_status.py
│ │ ├── custom_parameters/
│ │ │ └── tensorrt/
│ │ │ ├── invalid/
│ │ │ │ └── allocation_strategy_invalid_value/
│ │ │ │ ├── expected
│ │ │ │ └── partial.pbtxt
│ │ │ └── valid/
│ │ │ ├── allocation_strategy_no_key/
│ │ │ │ └── partial.pbtxt
│ │ │ ├── allocation_strategy_no_parameters/
│ │ │ │ └── partial.pbtxt
│ │ │ ├── allocation_strategy_value_1/
│ │ │ │ ├── expected
│ │ │ │ └── partial.pbtxt
│ │ │ └── allocation_strategy_value_2/
│ │ │ ├── expected
│ │ │ └── partial.pbtxt
│ │ ├── model_metrics/
│ │ │ ├── invalid_config/
│ │ │ │ ├── empty_buckets/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ ├── empty_metric_family/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ ├── no_buckets/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ ├── no_histogram_options/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ ├── no_metric_family/
│ │ │ │ │ ├── expected
│ │ │ │ │ └── partial.pbtxt
│ │ │ │ └── no_metric_identifier/
│ │ │ │ ├── expected
│ │ │ │ └── partial.pbtxt
│ │ │ ├── valid_config/
│ │ │ │ └── valid_model_metrics/
│ │ │ │ └── partial.pbtxt
│ │ │ └── valid_config_with_warn/
│ │ │ └── unknown_metric_family/
│ │ │ ├── expected
│ │ │ └── partial.pbtxt
│ │ ├── noautofill_platform/
│ │ │ ├── batch_input_less_source0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_less_source1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_less_source2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_less_source3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_many_source0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_many_source1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_many_source2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_many_source3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_input_unknown_source/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_duplicated_target/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_less_source/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_many_source/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_unknown_source/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── batch_output_unknown_target/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_unsupported
│ │ │ ├── control_kind_end_multiple/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── control_kind_ready_multiple/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── control_kind_start_multiple/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── control_tensor_multiple/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── control_tensor_no_value/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── default_priority_level0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── default_priority_level1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── ensemble_scheduling_set/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── invalid_cpu/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── invalid_gpu/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── missing_datatype/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── negative_gpu/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── negative_max_batch_size/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── preserve_ordering0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── preserve_ordering1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── preserve_ordering2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── priority_level0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── priority_level1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_elementcount0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_elementcount1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_elementcount2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_elementcount3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_empty0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_empty1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable4/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_variable5/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_zerodims0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_nobatch_zerodims1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable2/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable3/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable4/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_variable5/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_zerodims0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── reshape_zerodims1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── zerodims_input0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── zerodims_input1/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ ├── zerodims_output0/
│ │ │ │ ├── config.pbtxt
│ │ │ │ ├── expected
│ │ │ │ └── expected_ensemble
│ │ │ └── zerodims_output1/
│ │ │ ├── config.pbtxt
│ │ │ ├── expected
│ │ │ └── expected_ensemble
│ │ ├── special_cases/
│ │ │ ├── invalid_platform/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ ├── invalid_runtime/
│ │ │ │ ├── config.pbtxt
│ │ │ │ └── expected
│ │ │ └── runtime_escape/
│ │ │ ├── config.pbtxt
│ │ │ └── expected
│ │ └── test.sh
│ ├── L0_model_namespacing/
│ │ ├── python_addsub/
│ │ │ └── __init__.py
│ │ ├── python_subadd/
│ │ │ └── __init__.py
│ │ ├── test.py
│ │ ├── test.sh
│ │ ├── test_duplication/
│ │ │ ├── addsub_repo/
│ │ │ │ ├── composing_model/
│ │ │ │ │ └── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── simple_addsub/
│ │ │ │ └── config.pbtxt
│ │ │ └── subadd_repo/
│ │ │ ├── composing_model/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── simple_subadd/
│ │ │ └── config.pbtxt
│ │ ├── test_dynamic_resolution/
│ │ │ ├── addsub_repo/
│ │ │ │ ├── composing_model/
│ │ │ │ │ └── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── simple_addsub/
│ │ │ │ └── config.pbtxt
│ │ │ └── subadd_repo/
│ │ │ ├── composing_model/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── simple_subadd/
│ │ │ └── config.pbtxt
│ │ ├── test_ensemble_duplication/
│ │ │ ├── addsub_repo/
│ │ │ │ ├── composing_addsub/
│ │ │ │ │ └── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── simple_ensemble/
│ │ │ │ └── config.pbtxt
│ │ │ └── subadd_repo/
│ │ │ ├── composing_subadd/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── simple_ensemble/
│ │ │ └── config.pbtxt
│ │ └── test_no_duplication/
│ │ ├── addsub_repo/
│ │ │ ├── composing_addsub/
│ │ │ │ └── 1/
│ │ │ │ └── model.py
│ │ │ └── simple_addsub/
│ │ │ └── config.pbtxt
│ │ └── subadd_repo/
│ │ ├── composing_subadd/
│ │ │ └── 1/
│ │ │ └── model.py
│ │ └── simple_subadd/
│ │ └── config.pbtxt
│ ├── L0_model_queue/
│ │ ├── ensemble_zero_1_float32/
│ │ │ └── config.pbtxt
│ │ ├── model_queue_test.py
│ │ └── test.sh
│ ├── L0_model_update/
│ │ ├── instance_update_test.py
│ │ └── test.sh
│ ├── L0_multi_server/
│ │ └── test.sh
│ ├── L0_nan_inf/
│ │ ├── models/
│ │ │ └── nan_inf_output/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── nan_inf_test.py
│ │ └── test.sh
│ ├── L0_nullchar_string/
│ │ ├── nullchar_string_client.py
│ │ └── test.sh
│ ├── L0_onnx_optimization/
│ │ └── test.sh
│ ├── L0_openai/
│ │ ├── generate_engine.py
│ │ └── test.sh
│ ├── L0_optional_input/
│ │ ├── models/
│ │ │ ├── ensemble_identity_2_float32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── identity_2_float32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── optional_connecting_tensor/
│ │ │ │ └── config.pbtxt
│ │ │ ├── optional_identity/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── pipeline_identity_2_float32/
│ │ │ └── config.pbtxt
│ │ ├── optional_input_test.py
│ │ └── test.sh
│ ├── L0_orca/
│ │ ├── orca_http_test.py
│ │ └── test.sh
│ ├── L0_output_name/
│ │ ├── output_name_test.py
│ │ └── test.sh
│ ├── L0_output_validation/
│ │ ├── lt_op_val_client.py
│ │ └── test.sh
│ ├── L0_parallel_copy/
│ │ ├── parallel_copy_test.py
│ │ └── test.sh
│ ├── L0_parameters/
│ │ ├── class_count_test.py
│ │ ├── model_repository/
│ │ │ ├── ensemble/
│ │ │ │ └── config.pbtxt
│ │ │ ├── identity/
│ │ │ │ └── config.pbtxt
│ │ │ └── parameter/
│ │ │ └── 1/
│ │ │ └── model.py
│ │ ├── parameters_test.py
│ │ └── test.sh
│ ├── L0_passive_instance/
│ │ ├── models/
│ │ │ └── distributed_int32_int32_int32/
│ │ │ └── config.pbtxt
│ │ ├── passive_instance_test.py
│ │ └── test.sh
│ ├── L0_perf_deeprecommender/
│ │ ├── run_test.sh
│ │ └── test.sh
│ ├── L0_perf_kaldi/
│ │ ├── create_data.sh
│ │ └── test.sh
│ ├── L0_perf_nomodel/
│ │ ├── custom_models/
│ │ │ └── custom_zero_1_float32/
│ │ │ └── config.pbtxt
│ │ ├── run_test.sh
│ │ └── test.sh
│ ├── L0_perf_pyclients/
│ │ ├── custom_models/
│ │ │ └── custom_zero_1_int32/
│ │ │ └── config.pbtxt
│ │ ├── simple_perf_client.py
│ │ └── test.sh
│ ├── L0_perf_resnet/
│ │ ├── run_test.sh
│ │ └── test.sh
│ ├── L0_perf_tensorrt_llm/
│ │ └── test.sh
│ ├── L0_perf_vllm/
│ │ └── test.sh
│ ├── L0_pinned_memory/
│ │ ├── libtorch_ensemble.pbtxt
│ │ └── test.sh
│ ├── L0_priority/
│ │ └── test.sh
│ ├── L0_python_api/
│ │ ├── test.sh
│ │ ├── test_kserve.py
│ │ ├── test_model_repository/
│ │ │ ├── delayed_identity/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── identity/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ └── testing_utils.py
│ ├── L0_python_client_unit_tests/
│ │ └── test.sh
│ ├── L0_pytorch_python_runtime/
│ │ ├── infer.py
│ │ ├── test.sh
│ │ └── unit_test.py
│ ├── L0_query/
│ │ ├── models/
│ │ │ └── query/
│ │ │ └── config.pbtxt
│ │ ├── query_e2e.py
│ │ └── test.sh
│ ├── L0_rate_limiter/
│ │ ├── rate_limiter_test.py
│ │ └── test.sh
│ ├── L0_register/
│ │ ├── config.pbtxt
│ │ └── test.sh
│ ├── L0_repoagent_checksum/
│ │ ├── identity_test.py
│ │ ├── models/
│ │ │ └── identity_int32/
│ │ │ ├── config.pbtxt
│ │ │ └── data_file
│ │ └── test.sh
│ ├── L0_request_cancellation/
│ │ ├── grpc_cancellation_test.py
│ │ ├── implicit_state_model/
│ │ │ ├── config.pbtxt
│ │ │ ├── gen_model.py
│ │ │ └── model.pt
│ │ ├── implicit_state_test.py
│ │ ├── scheduler_test.py
│ │ └── test.sh
│ ├── L0_response_cache/
│ │ ├── ensemble_cache_test.py
│ │ ├── generate_random_data.py
│ │ ├── models/
│ │ │ ├── decoupled_cache/
│ │ │ │ └── config.pbtxt
│ │ │ └── identity_cache/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_response_statistics/
│ │ ├── response_statistics_test.py
│ │ └── test.sh
│ ├── L0_sagemaker/
│ │ ├── sagemaker_generate_stream_test.py
│ │ ├── sagemaker_generate_test.py
│ │ ├── sagemaker_multi_model_test.py
│ │ ├── sagemaker_request_many_chunks.py
│ │ ├── sagemaker_test.py
│ │ └── test.sh
│ ├── L0_scalar_io/
│ │ ├── scalar_test.py
│ │ └── test.sh
│ ├── L0_sdk/
│ │ ├── grpc_test.cc
│ │ ├── http_test.cc
│ │ └── test.sh
│ ├── L0_secure_grpc/
│ │ └── test.sh
│ ├── L0_sequence_batcher/
│ │ ├── request_timeout_models/
│ │ │ └── custom_sequence_int32_timeout/
│ │ │ └── config.pbtxt
│ │ ├── sequence_batcher_test.py
│ │ └── test.sh
│ ├── L0_sequence_corrid_batcher/
│ │ ├── sequence_corrid_batcher_test.py
│ │ └── test.sh
│ ├── L0_sequence_stress/
│ │ ├── sequence_stress.py
│ │ └── test.sh
│ ├── L0_server_status/
│ │ ├── server_status_test.py
│ │ └── test.sh
│ ├── L0_shared_memory/
│ │ ├── shared_memory_test.py
│ │ └── test.sh
│ ├── L0_simple_ensemble/
│ │ ├── backpressure_test_models/
│ │ │ ├── decoupled_producer/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── ensemble_disabled_max_inflight_requests/
│ │ │ └── config.pbtxt
│ │ ├── ensemble_backpressure_test.py
│ │ ├── ensemble_test.py
│ │ ├── models/
│ │ │ ├── ensemble_add_sub_int32_int32_int32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── ensemble_partial_add_sub/
│ │ │ │ └── config.pbtxt
│ │ │ ├── partial_add_sub/
│ │ │ │ ├── 1/
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ └── simple/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_simple_example/
│ │ └── test.sh
│ ├── L0_simple_go_client/
│ │ └── test.sh
│ ├── L0_simple_lib/
│ │ └── test.sh
│ ├── L0_simple_nodejs_client/
│ │ └── test.sh
│ ├── L0_socket/
│ │ ├── models/
│ │ │ └── simple/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_storage_S3/
│ │ └── test.sh
│ ├── L0_storage_S3_local/
│ │ ├── mock_s3_service.py
│ │ └── test.sh
│ ├── L0_storage_azure/
│ │ └── test.sh
│ ├── L0_storage_swiftstack/
│ │ ├── infer_test.py
│ │ └── test.sh
│ ├── L0_string_io/
│ │ ├── string_client_test.py
│ │ └── test.sh
│ ├── L0_trace/
│ │ ├── models/
│ │ │ └── input_all_required/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── opentelemetry_unittest.py
│ │ ├── test.sh
│ │ ├── trace-config.yaml
│ │ ├── trace_context.py
│ │ ├── trace_endpoint_test.py
│ │ └── trace_stress_grpc_client.py
│ ├── L0_triton_repo_agent/
│ │ ├── models/
│ │ │ ├── chain_relocation/
│ │ │ │ └── config.pbtxt
│ │ │ └── relocation_sanity_check/
│ │ │ └── config.pbtxt
│ │ └── test.sh
│ ├── L0_trt_bf16_dtype/
│ │ ├── test.sh
│ │ └── trt_bf16_dtype_test.py
│ ├── L0_trt_compat/
│ │ ├── test.sh
│ │ └── trt_compatibility_test.py
│ ├── L0_trt_data_dependent_shape/
│ │ ├── test.sh
│ │ └── trt_data_dependent_shape_test.py
│ ├── L0_trt_dla/
│ │ ├── dla_test.py
│ │ └── test.sh
│ ├── L0_trt_dynamic_shape/
│ │ ├── test.sh
│ │ └── trt_dynamic_shape_test.py
│ ├── L0_trt_error_propagation/
│ │ ├── test.sh
│ │ └── trt_error_propagation_test.py
│ ├── L0_trt_plugin/
│ │ ├── test.sh
│ │ └── trt_plugin_test.py
│ ├── L0_trt_reformat_free/
│ │ ├── test.sh
│ │ └── trt_reformat_free_test.py
│ ├── L0_trt_shape_tensors/
│ │ ├── test.sh
│ │ └── trt_shape_tensor_test.py
│ ├── L0_vertex_ai/
│ │ ├── test.sh
│ │ └── vertex_ai_test.py
│ ├── L0_warmup/
│ │ ├── decoupled/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── failing_infer/
│ │ │ ├── 1/
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── raw_mug_data
│ │ └── test.sh
│ ├── common/
│ │ ├── busy_op_kernel.cu.cc
│ │ ├── check_copyright.py
│ │ ├── check_massif_log.py
│ │ ├── check_valgrind_log.py
│ │ ├── gen_common.py
│ │ ├── gen_ensemble_model_utils.py
│ │ ├── gen_jetson_trt_models
│ │ ├── gen_qa_custom_ops_models.py
│ │ ├── gen_qa_dyna_sequence_implicit_models.py
│ │ ├── gen_qa_dyna_sequence_models.py
│ │ ├── gen_qa_identity_models.py
│ │ ├── gen_qa_image_models.py
│ │ ├── gen_qa_implicit_models.py
│ │ ├── gen_qa_model_repository
│ │ ├── gen_qa_models.py
│ │ ├── gen_qa_ort_scalar_models.py
│ │ ├── gen_qa_pytorch_model.py
│ │ ├── gen_qa_ragged_models.py
│ │ ├── gen_qa_reshape_models.py
│ │ ├── gen_qa_sequence_models.py
│ │ ├── gen_qa_torchtrt_models.py
│ │ ├── gen_qa_trt_data_dependent_shape.py
│ │ ├── gen_qa_trt_format_models.py
│ │ ├── gen_qa_trt_plugin_models.py
│ │ ├── infer_test.py
│ │ ├── infer_util.py
│ │ ├── inferentia_perf_analyzer_input_data_json/
│ │ │ ├── non_aligned_validation_batched.json
│ │ │ ├── non_aligned_validation_no_batch.json
│ │ │ ├── simple_model.py
│ │ │ ├── validation_batched.json
│ │ │ ├── validation_no_batch.json
│ │ │ ├── wrong_validation_batched.json
│ │ │ └── wrong_validation_no_batch.json
│ │ ├── libtorch_infer_client.py
│ │ ├── nightly_email_helper.py
│ │ ├── orca_header_test.py
│ │ ├── perf_analyzer_input_data_json/
│ │ │ ├── float_data_with_shape.json
│ │ │ ├── image_data.json
│ │ │ ├── int_data.json
│ │ │ ├── int_data_diff_shape.json
│ │ │ ├── int_data_optional.json
│ │ │ ├── non_aligned_output.json
│ │ │ ├── output.json
│ │ │ ├── repeat_int32_data.json
│ │ │ ├── seq_data.json
│ │ │ ├── seq_output.json
│ │ │ ├── seq_wrong_output.json
│ │ │ ├── shape_tensor_data.json
│ │ │ ├── string_data.json
│ │ │ ├── string_data_with_shape.json
│ │ │ ├── wrong_output.json
│ │ │ └── wrong_output_2.json
│ │ ├── reporter.py
│ │ ├── resnet50_labels.txt
│ │ ├── run_all_tests.sh
│ │ ├── sequence_util.py
│ │ ├── shm_util.py
│ │ ├── show_testlogs
│ │ ├── test_util.py
│ │ ├── trace_summary.py
│ │ ├── trtllm_util.sh
│ │ └── util.sh
│ ├── custom_models/
│ │ ├── custom_dyna_sequence_int32/
│ │ │ └── config.pbtxt
│ │ ├── custom_sequence_int32/
│ │ │ └── config.pbtxt
│ │ └── custom_zero_1_float32/
│ │ └── config.pbtxt
│ ├── ensemble_models/
│ │ ├── batch_to_nobatch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── batch_to_nobatch_nobatch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── label_override_int32_float32_float32/
│ │ │ ├── config.pbtxt
│ │ │ └── output0_labels.txt
│ │ ├── mix_ensemble_int32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── mix_nobatch_batch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── mix_platform_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── mix_type_int32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── nobatch_to_batch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ ├── nobatch_to_batch_nobatch_float32_float32_float32/
│ │ │ └── config.pbtxt
│ │ └── wrong_label_int32_float32_float32/
│ │ ├── config.pbtxt
│ │ └── output0_labels.txt
│ ├── openvino_models/
│ │ ├── README.md
│ │ ├── dynamic_batch/
│ │ │ └── 1/
│ │ │ └── model.mapping
│ │ └── fixed_batch/
│ │ └── 1/
│ │ └── model.mapping
│ └── python_models/
│ ├── add_sub/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── add_sub_gpu/
│ │ └── config.pbtxt
│ ├── async_execute_decouple/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── async_execute_decouple_bls/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── auto_complete/
│ │ └── model.py
│ ├── auto_complete_error/
│ │ └── model.py
│ ├── bls/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_async/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_finalize_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_init_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_memory/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_memory_async/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_model_loading/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_onnx_warmup/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_parameters/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_request_rescheduling/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── bls_simple/
│ │ └── bls_simple.py
│ ├── bls_undefined/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── busy_op/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── cuda_memory_consumer/
│ │ ├── 1/
│ │ │ └── model.py
│ │ └── config.pbtxt
│ ├── custom_metrics/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── delayed_model/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_add_sub/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_empty_output/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_identity/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_io_identity/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_io_identity_decoupled/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_square/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_sub_add/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── dlpack_test/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── ensemble/
│ │ └── config.pbtxt
│ ├── ensemble_gpu/
│ │ └── config.pbtxt
│ ├── ensemble_io/
│ │ └── config.pbtxt
│ ├── error_code/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_cancel/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_delayed_model/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_grpc_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── execute_return_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── fan_add_sub/
│ │ └── config.pbtxt
│ ├── fini_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── generate_models/
│ │ └── mock_llm/
│ │ ├── 1/
│ │ │ └── model.py
│ │ └── config.pbtxt
│ ├── ground_truth/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── identity_bf16/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── identity_fp32/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── identity_fp32_logging/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── identity_fp32_timeout/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── init_args/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── init_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── init_exit/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── iterative_sequence/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── model_env/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── model_init_del/
│ │ ├── config.pbtxt
│ │ ├── model.py
│ │ └── util.py
│ ├── multi_file/
│ │ ├── file1.py
│ │ ├── file2.py
│ │ └── model.py
│ ├── non_contiguous/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── optional/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── python_based_backends/
│ │ └── add_sub_backend/
│ │ └── model.py
│ ├── python_version/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── pytorch_fp32_fp32/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── request_rescheduling_addsub/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_parameters/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_parameters_bls/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_parameters_decoupled/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_sender/
│ │ ├── config.pbtxt
│ │ ├── model.py
│ │ ├── model_async.py
│ │ └── model_common.py
│ ├── response_sender_complete_final/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_sender_error/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── response_sender_until_cancelled/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── sequence_int32/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── sequence_py/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── simple_identity_fp32/
│ │ └── config.pbtxt
│ ├── string/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── string_fixed/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── string_identity/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── sub_add/
│ │ └── model.py
│ ├── torchvision/
│ │ └── resnet50/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── variable_gpu_output/
│ │ ├── config.pbtxt
│ │ └── model.py
│ ├── wrong_model/
│ │ ├── config.pbtxt
│ │ └── model.py
│ └── wrong_return_type/
│ ├── config.pbtxt
│ └── model.py
├── src/
│ ├── CMakeLists.txt
│ ├── classification.cc
│ ├── classification.h
│ ├── command_line_parser.cc
│ ├── command_line_parser.h
│ ├── common.cc
│ ├── common.h
│ ├── data_compressor.h
│ ├── grpc/
│ │ ├── CMakeLists.txt
│ │ ├── grpc_handler.h
│ │ ├── grpc_server.cc
│ │ ├── grpc_server.h
│ │ ├── grpc_utils.cc
│ │ ├── grpc_utils.h
│ │ ├── infer_handler.cc
│ │ ├── infer_handler.h
│ │ ├── stream_infer_handler.cc
│ │ └── stream_infer_handler.h
│ ├── http_server.cc
│ ├── http_server.h
│ ├── main.cc
│ ├── memory_alloc.cc
│ ├── multi_server.cc
│ ├── orca_http.cc
│ ├── orca_http.h
│ ├── python/
│ │ ├── CMakeLists.txt
│ │ ├── build_wheel.py
│ │ ├── examples/
│ │ │ ├── example.py
│ │ │ └── example_model_repository/
│ │ │ └── identity/
│ │ │ ├── 1/
│ │ │ │ └── model.onnx
│ │ │ └── config.pbtxt
│ │ ├── setup.py
│ │ └── tritonfrontend/
│ │ ├── CMakeLists.txt
│ │ ├── __init__.py
│ │ ├── __init__.pyi
│ │ ├── _api/
│ │ │ ├── __init__.py
│ │ │ ├── _error_mapping.py
│ │ │ ├── _kservegrpc.py
│ │ │ ├── _kservegrpc.pyi
│ │ │ ├── _kservehttp.py
│ │ │ ├── _kservehttp.pyi
│ │ │ ├── _metrics.py
│ │ │ └── _metrics.pyi
│ │ ├── _c/
│ │ │ ├── __init__.py
│ │ │ ├── __init__.pyi
│ │ │ ├── tritonfrontend.h
│ │ │ ├── tritonfrontend_bindings.pyi
│ │ │ └── tritonfrontend_pybind.cc
│ │ └── py.typed
│ ├── restricted_features.h
│ ├── sagemaker_server.cc
│ ├── sagemaker_server.h
│ ├── shared_memory_manager.cc
│ ├── shared_memory_manager.h
│ ├── simple.cc
│ ├── test/
│ │ ├── CMakeLists.txt
│ │ ├── data_compressor_test.cc
│ │ ├── distributed_addsub/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonDistributedAddsubBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── distributed_addsub.cc
│ │ │ └── libtriton_distributed_addsub.ldscript
│ │ ├── dyna_sequence/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonDynaSequenceBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── dyna_sequence.cc
│ │ │ └── libtriton_dyna_sequence.ldscript
│ │ ├── implicit_state/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonImplicitStateBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── implicit_state.cc
│ │ │ └── libtriton_implicit_state.ldscript
│ │ ├── iterative_sequence/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonIterativeSequenceBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── iterative_sequence.cc
│ │ │ └── libtriton_iterative_sequence.ldscript
│ │ ├── models/
│ │ │ ├── identity_fp32/
│ │ │ │ └── config.pbtxt
│ │ │ ├── repeat_int32/
│ │ │ │ └── config.pbtxt
│ │ │ └── square_int32/
│ │ │ └── config.pbtxt
│ │ ├── query_backend/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonQueryBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── libtriton_query.ldscript
│ │ │ └── query.cc
│ │ ├── repoagent/
│ │ │ └── relocation_repoagent/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonRelocationRepoAgentConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── libtritonrepoagent_relocation.ldscript
│ │ │ └── relocation.cc
│ │ ├── sequence/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── cmake/
│ │ │ │ └── TritonSequenceBackendConfig.cmake.in
│ │ │ └── src/
│ │ │ ├── libtriton_sequence.ldscript
│ │ │ └── sequence.cc
│ │ └── tensor_size_test.cc
│ ├── tracer.cc
│ ├── tracer.h
│ ├── triton_signal.cc
│ ├── triton_signal.h
│ ├── vertex_ai_server.cc
│ └── vertex_ai_server.h
└── tools/
└── add_copyright.py
Showing preview only (308K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (3571 symbols across 405 files)
FILE: build.py
function log (line 96) | def log(msg, force=False):
function log_verbose (line 104) | def log_verbose(msg):
function fail (line 109) | def fail(msg):
function fail_if (line 113) | def fail_if(p, msg):
function target_platform (line 119) | def target_platform():
function target_machine (line 136) | def target_machine():
function container_versions (line 143) | def container_versions(version, container_version, upstream_container_ve...
class BuildScript (line 151) | class BuildScript:
method __init__ (line 154) | def __init__(self, filepath, desc=None, verbose=False):
method __enter__ (line 160) | def __enter__(self):
method __exit__ (line 163) | def __exit__(self, type, value, traceback):
method __del__ (line 166) | def __del__(self):
method close (line 169) | def close(self):
method blankln (line 184) | def blankln(self):
method commentln (line 187) | def commentln(self, cnt):
method comment (line 190) | def comment(self, msg=""):
method comment_verbose (line 200) | def comment_verbose(self, msg=""):
method header (line 204) | def header(self, desc=None):
method envvar_ref (line 233) | def envvar_ref(self, v):
method cmd (line 238) | def cmd(self, clist, check_exitcode=False):
method cwd (line 255) | def cwd(self, path):
method cp (line 261) | def cp(self, src, dest):
method mkdir (line 267) | def mkdir(self, path):
method rmdir (line 275) | def rmdir(self, path):
method cpdir (line 283) | def cpdir(self, src, dest):
method tar (line 289) | def tar(self, subdir, tar_filename):
method cmake (line 295) | def cmake(self, args):
method makeinstall (line 302) | def makeinstall(self, target="install"):
method gitclone (line 308) | def gitclone(self, repo, tag, subdir, org):
function cmake_core_arg (line 341) | def cmake_core_arg(name, type, value):
function cmake_core_enable (line 353) | def cmake_core_enable(name, flag):
function cmake_core_extra_args (line 364) | def cmake_core_extra_args():
function cmake_backend_arg (line 371) | def cmake_backend_arg(backend, name, type, value):
function cmake_backend_enable (line 384) | def cmake_backend_enable(backend, name, flag):
function cmake_backend_extra_args (line 397) | def cmake_backend_extra_args(backend):
function cmake_repoagent_arg (line 405) | def cmake_repoagent_arg(name, type, value):
function cmake_repoagent_enable (line 414) | def cmake_repoagent_enable(name, flag):
function cmake_repoagent_extra_args (line 420) | def cmake_repoagent_extra_args():
function cmake_cache_arg (line 426) | def cmake_cache_arg(name, type, value):
function cmake_cache_enable (line 435) | def cmake_cache_enable(name, flag):
function cmake_cache_extra_args (line 441) | def cmake_cache_extra_args():
function core_cmake_args (line 447) | def core_cmake_args(components, backends, cmake_dir, install_dir):
function repoagent_repo (line 507) | def repoagent_repo(ra):
function repoagent_cmake_args (line 511) | def repoagent_cmake_args(images, components, ra, install_dir):
function cache_repo (line 530) | def cache_repo(cache):
function cache_cmake_args (line 535) | def cache_cmake_args(images, components, cache, install_dir):
function backend_repo (line 554) | def backend_repo(be):
function backend_cmake_args (line 558) | def backend_cmake_args(images, components, be, install_dir, library_paths):
function python_cmake_args (line 632) | def python_cmake_args():
function pytorch_cmake_args (line 644) | def pytorch_cmake_args(images):
function onnxruntime_cmake_args (line 670) | def onnxruntime_cmake_args(images, library_paths):
function openvino_cmake_args (line 751) | def openvino_cmake_args():
function tensorrt_cmake_args (line 786) | def tensorrt_cmake_args():
function dali_cmake_args (line 800) | def dali_cmake_args():
function fil_cmake_args (line 806) | def fil_cmake_args(images):
function armnn_tflite_cmake_args (line 825) | def armnn_tflite_cmake_args():
function fastertransformer_cmake_args (line 831) | def fastertransformer_cmake_args():
function tensorrtllm_cmake_args (line 842) | def tensorrtllm_cmake_args(images):
function install_dcgm_libraries (line 848) | def install_dcgm_libraries(dcgm_version, target_machine):
function create_dockerfile_buildbase_rhel (line 918) | def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
function create_dockerfile_buildbase (line 1035) | def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
function create_dockerfile_cibase (line 1175) | def create_dockerfile_cibase(ddir, dockerfile_name, argmap):
function create_dockerfile_linux (line 1205) | def create_dockerfile_linux(
function dockerfile_prepare_container_linux (line 1292) | def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, tar...
function add_cpu_libs_to_linux_dockerfile (line 1519) | def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
function change_default_python_version_rhel (line 1581) | def change_default_python_version_rhel(version):
function create_dockerfile_windows (line 1605) | def create_dockerfile_windows(
function create_build_dockerfiles (line 1654) | def create_build_dockerfiles(
function create_docker_build_script (line 1737) | def create_docker_build_script(script_name, container_install_dir, conta...
function core_build (line 1916) | def core_build(
function tensorrtllm_prebuild (line 2015) | def tensorrtllm_prebuild(cmake_script):
function tensorrtllm_postbuild (line 2024) | def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be...
function backend_build (line 2040) | def backend_build(
function backend_clone (line 2105) | def backend_clone(
function repo_agent_build (line 2142) | def repo_agent_build(
function cache_build (line 2175) | def cache_build(cache, cmake_script, build_dir, install_dir, cache_repo,...
function cibase_build (line 2206) | def cibase_build(
function finalize_build (line 2313) | def finalize_build(cmake_script, install_dir, ci_dir):
function enable_all (line 2318) | def enable_all():
FILE: compose.py
function log (line 37) | def log(msg, force=False):
function log_verbose (line 45) | def log_verbose(msg):
function fail (line 50) | def fail(msg):
function fail_if (line 55) | def fail_if(p, msg):
function start_dockerfile (line 60) | def start_dockerfile(ddir, images, argmap, dockerfile_name, backends):
function add_requested_backends (line 112) | def add_requested_backends(ddir, dockerfile_name, backends):
function add_requested_repoagents (line 128) | def add_requested_repoagents(ddir, dockerfile_name, repoagents):
function add_requested_caches (line 144) | def add_requested_caches(ddir, dockerfile_name, caches):
function end_dockerfile (line 160) | def end_dockerfile(ddir, dockerfile_name, argmap):
function build_docker_image (line 172) | def build_docker_image(ddir, dockerfile_name, container_name):
function get_container_version_if_not_specified (line 189) | def get_container_version_if_not_specified():
function create_argmap (line 203) | def create_argmap(images, skip_pull):
FILE: deploy/gke-marketplace-app/client-sample/locustfile_bert.py
class ProfileLoad (line 34) | class ProfileLoad(LoadTestShape):
method tick (line 46) | def tick(self):
class TritonUser (line 60) | class TritonUser(HttpUser):
method bert (line 64) | def bert(self):
method on_start (line 67) | def on_start(self):
FILE: deploy/mlflow-triton-plugin/mlflow_triton/config.py
class Config (line 35) | class Config(dict):
method __init__ (line 36) | def __init__(self):
method parse_path (line 66) | def parse_path(self, path):
method clean_path (line 97) | def clean_path(self, s3_path):
FILE: deploy/mlflow-triton-plugin/mlflow_triton/deployments.py
class TritonPlugin (line 55) | class TritonPlugin(BaseDeploymentClient):
method __init__ (line 56) | def __init__(self, uri):
method _get_triton_server_config (line 76) | def _get_triton_server_config(self):
method create_deployment (line 89) | def create_deployment(self, name, model_uri, flavor=None, config=None):
method delete_deployment (line 124) | def delete_deployment(self, name):
method update_deployment (line 148) | def update_deployment(self, name, model_uri=None, flavor=None, config=...
method list_deployments (line 186) | def list_deployments(self):
method get_deployment (line 225) | def get_deployment(self, name):
method predict (line 240) | def predict(self, deployment_name, df):
method _generate_mlflow_meta_file (line 287) | def _generate_mlflow_meta_file(self, name, flavor, model_uri):
method _get_mlflow_meta_dict (line 312) | def _get_mlflow_meta_dict(self, name):
method _get_copy_paths (line 335) | def _get_copy_paths(self, artifact_path, name, flavor):
method _walk (line 393) | def _walk(self, path):
method _copy_files_to_triton_repo (line 407) | def _copy_files_to_triton_repo(self, artifact_path, name, flavor):
method _delete_mlflow_meta (line 456) | def _delete_mlflow_meta(self, filepath):
method _delete_deployment_files (line 465) | def _delete_deployment_files(self, name):
method _validate_config_args (line 505) | def _validate_config_args(self, config):
method _validate_flavor (line 515) | def _validate_flavor(self, flavor):
method _validate_model_name (line 519) | def _validate_model_name(self, name):
method _model_exists (line 531) | def _model_exists(self, name):
function run_local (line 540) | def run_local(name, model_uri, flavor=None, config=None):
function target_help (line 544) | def target_help():
FILE: deploy/mlflow-triton-plugin/scripts/publish_model_to_mlflow.py
function publish_to_mlflow (line 52) | def publish_to_mlflow(model_name, model_directory, flavor):
FILE: deploy/mlflow-triton-plugin/scripts/triton_flavor.py
function save_model (line 52) | def save_model(
function log_model (line 85) | def log_model(
FILE: docs/conf.py
function setup_logger (line 70) | def setup_logger(name, log_file, level=logging.INFO, max_bytes=1048576, ...
function ultimateReplace (line 258) | def ultimateReplace(app, docname, source):
function setup (line 365) | def setup(app):
FILE: docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc
function Usage (line 79) | void
function TRITONSERVER_Error (line 105) | TRITONSERVER_Error*
function TRITONSERVER_Error (line 205) | TRITONSERVER_Error*
function InferRequestComplete (line 260) | void
function InferResponseComplete (line 267) | void
function TRITONSERVER_Error (line 281) | TRITONSERVER_Error*
function ResizeKeepAspectRatio (line 315) | cv::Mat
function SaveOverlay (line 352) | void
function Normalize (line 395) | void
function RecoverBoundingBoxes (line 410) | void
function ParseDetections (line 489) | void
function DetectionInferenceOutput (line 569) | void
function SetServerOptions (line 604) | void
function CheckServerLiveAndReady (line 654) | void
function PrintServerStatus (line 681) | void
function AwaitModelReady (line 704) | void
function LoadInputImageFromFile (line 770) | void
function LoadInputData (line 796) | void
function RunInferenceAndValidate (line 813) | void
function PrintModelStats (line 945) | void
function CreateAndRunTritonserverInstance (line 972) | void
function main (line 1071) | int
FILE: docs/examples/model_repository/simple_identity/1/model.py
class TritonPythonModel (line 32) | class TritonPythonModel:
method initialize (line 35) | def initialize(self, args):
method execute (line 38) | def execute(self, requests):
FILE: docs/generate_docs.py
function setup_logger (line 71) | def setup_logger(name, log_file, level=logging.INFO, max_bytes=1048576, ...
function run_command (line 136) | def run_command(command):
function clone_from_github (line 149) | def clone_from_github(repo, tag, org):
function is_excluded (line 166) | def is_excluded(file_path):
function get_git_repo_name (line 176) | def get_git_repo_name(file_path):
function replace_url_with_relpath (line 191) | def replace_url_with_relpath(url, src_doc_path):
function replace_relpath_with_url (line 259) | def replace_relpath_with_url(relpath, src_doc_path):
function replace_hyperlink (line 315) | def replace_hyperlink(m, src_doc_path):
function preprocess_docs (line 329) | def preprocess_docs(exclude_paths=None):
function main (line 353) | def main():
FILE: python/openai/openai_frontend/engine/engine.py
class LLMEngine (line 43) | class LLMEngine(Protocol):
method ready (line 52) | def ready(self) -> bool:
method metrics (line 58) | def metrics(self) -> str:
method models (line 64) | def models(self) -> List[Model]:
method chat (line 70) | def chat(
method completion (line 84) | def completion(
method embedding (line 98) | def embedding(self, request: CreateEmbeddingRequest) -> CreateEmbeddin...
FILE: python/openai/openai_frontend/engine/triton_engine.py
class TritonModelMetadata (line 101) | class TritonModelMetadata:
class TritonLLMEngine (line 121) | class TritonLLMEngine(LLMEngine):
method __init__ (line 122) | def __init__(
method ready (line 151) | def ready(self) -> bool:
method metrics (line 154) | def metrics(self) -> str:
method models (line 157) | def models(self) -> List[Model]:
method chat (line 183) | async def chat(
method _get_chat_completion_response_message (line 288) | def _get_chat_completion_response_message(
method completion (line 338) | async def completion(
method embedding (line 399) | async def embedding(
method _get_embedding (line 441) | def _get_embedding(
method _get_first_response_role (line 451) | def _get_first_response_role(
method _determine_request_converter (line 460) | def _determine_request_converter(self, backend: str, request_type: Req...
method _get_model_and_lora_name (line 479) | def _get_model_and_lora_name(self, request_model_name: str):
method _get_tokenizer (line 489) | def _get_tokenizer(self, tokenizer_name: str):
method _get_model_metadata (line 496) | def _get_model_metadata(self) -> Dict[str, TritonModelMetadata]:
method _get_streaming_chat_response_chunk (line 544) | def _get_streaming_chat_response_chunk(
method _get_first_streaming_chat_response (line 562) | def _get_first_streaming_chat_response(
method _streaming_chat_iterator (line 579) | async def _streaming_chat_iterator(
method _get_streaming_response_delta (line 677) | def _get_streaming_response_delta(
method _validate_chat_request (line 781) | def _validate_chat_request(
method _verify_chat_tool_call_settings (line 844) | def _verify_chat_tool_call_settings(self, request: CreateChatCompletio...
method _streaming_completion_iterator (line 881) | async def _streaming_completion_iterator(
method _validate_completion_request (line 949) | def _validate_completion_request(
method _validate_embedding_request (line 1025) | def _validate_embedding_request(
method _should_stream_with_auto_tool_parsing (line 1051) | def _should_stream_with_auto_tool_parsing(
method _should_check_for_unstreamed_tool_arg_tokens (line 1061) | def _should_check_for_unstreamed_tool_arg_tokens(
method _get_named_function_name (line 1074) | def _get_named_function_name(
method _get_lora_config (line 1094) | def _get_lora_config(
FILE: python/openai/openai_frontend/engine/utils/chat.py
class ConversationMessage (line 44) | class ConversationMessage(TypedDict, total=False):
function _frontend_schema_to_openai_schema_completion_tool_call (line 61) | def _frontend_schema_to_openai_schema_completion_tool_call(
function _parse_chat_message_content_parts (line 74) | def _parse_chat_message_content_parts(
function _parse_chat_message_content (line 91) | def _parse_chat_message_content(
function _postprocess_messages (line 128) | def _postprocess_messages(messages: List[ConversationMessage]) -> None:
function parse_chat_messages (line 146) | def parse_chat_messages(
function load_chat_template (line 163) | def load_chat_template(chat_template) -> Optional[str]:
FILE: python/openai/openai_frontend/engine/utils/tokenizer.py
function get_cached_tokenizer (line 38) | def get_cached_tokenizer(
function get_tokenizer (line 76) | def get_tokenizer(
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/llama_tool_call_parser.py
class Llama3JsonToolParser (line 55) | class Llama3JsonToolParser(ToolCallParser):
method __init__ (line 56) | def __init__(self, tokenizer: AnyTokenizer):
method parse_tool_calls (line 70) | def parse_tool_calls(
method parse_tool_calls_streaming (line 137) | def parse_tool_calls_streaming(
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/mistral_tool_call_parser.py
function generate_mistral_random_id (line 58) | def generate_mistral_random_id():
class MistralToolParser (line 65) | class MistralToolParser(ToolCallParser):
method __init__ (line 66) | def __init__(self, tokenizer: AnyTokenizer):
method parse_tool_calls (line 80) | def parse_tool_calls(
method parse_tool_calls_streaming (line 144) | def parse_tool_calls_streaming(
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/tool_call_parser.py
class ToolCallParser (line 39) | class ToolCallParser:
method __init__ (line 45) | def __init__(self, tokenizer: AnyTokenizer):
method parse_tool_calls (line 54) | def parse_tool_calls(
method parse_tool_calls_streaming (line 61) | def parse_tool_calls_streaming(
class ToolParserManager (line 69) | class ToolParserManager:
method get_tool_parser_cls (line 73) | def get_tool_parser_cls(cls, name) -> type:
method _register_module (line 80) | def _register_module(
method register_module (line 103) | def register_module(
FILE: python/openai/openai_frontend/engine/utils/tool_call_parsers/utils.py
function partial_json_loads (line 40) | def partial_json_loads(input_str: str, flags: Allow) -> tuple[Any, int]:
function is_complete_json (line 50) | def is_complete_json(input_str: str) -> bool:
function find_common_prefix (line 58) | def find_common_prefix(s1: str, s2: str) -> str:
function find_common_suffix (line 81) | def find_common_suffix(s1: str, s2: str) -> str:
function extract_intermediate_diff (line 99) | def extract_intermediate_diff(curr: str, old: str) -> str:
FILE: python/openai/openai_frontend/engine/utils/triton.py
class RequestKind (line 55) | class RequestKind(Enum):
class TritonLoraConfig (line 61) | class TritonLoraConfig:
function _create_vllm_generate_request (line 70) | def _create_vllm_generate_request(
function _create_trtllm_generate_request (line 179) | def _create_trtllm_generate_request(
function _create_vllm_embedding_request (line 252) | def _create_vllm_embedding_request(
function _create_trtllm_embedding_request (line 272) | def _create_trtllm_embedding_request(
function _construct_string_from_pointer (line 281) | def _construct_string_from_pointer(pointer: int, size: int) -> str:
function _get_volume (line 294) | def _get_volume(shape: Iterable[int]) -> int:
function _to_string (line 302) | def _to_string(tensor: tritonserver.Tensor) -> str:
class _StreamingUsageAccumulator (line 324) | class _StreamingUsageAccumulator:
method update (line 332) | def update(self, response: tritonserver.InferenceResponse):
method get_final_usage (line 342) | def get_final_usage(self) -> Optional[CompletionUsage]:
function _get_usage_from_response (line 357) | def _get_usage_from_response(
function _get_output (line 415) | def _get_output(response: tritonserver._api._response.InferenceResponse)...
function _get_logprobs_from_response (line 429) | def _get_logprobs_from_response(
function _get_openai_chat_format_logprobs_from_vllm_response (line 468) | def _get_openai_chat_format_logprobs_from_vllm_response(
function _get_openai_completion_format_logprobs_from_vllm_response (line 526) | def _get_openai_completion_format_logprobs_from_vllm_response(
function _validate_triton_responses_non_streaming (line 584) | def _validate_triton_responses_non_streaming(
function _get_guided_json_from_tool (line 597) | def _get_guided_json_from_tool(
function _validate_lora_path_trtllm (line 620) | def _validate_lora_path_trtllm(repo_path: str, lora_path: str, lora_name...
function _parse_lora_configs (line 648) | def _parse_lora_configs(
FILE: python/openai/openai_frontend/frontend/fastapi/middleware/api_restriction.py
class RestrictedFeatures (line 45) | class RestrictedFeatures:
method __init__ (line 54) | def __init__(self, args: list[str]):
method ParseRestrictedFeatureOption (line 66) | def ParseRestrictedFeatureOption(self, args):
method RestrictionDict (line 93) | def RestrictionDict(self) -> dict[str, tuple[str, str]]:
method Insert (line 102) | def Insert(self, api: str, restriction: tuple[str, str]):
method IsRestricted (line 112) | def IsRestricted(self, api: str) -> bool:
class APIRestrictionMiddleware (line 125) | class APIRestrictionMiddleware(BaseHTTPMiddleware):
method __init__ (line 136) | def __init__(self, app, restricted_apis: RestrictedFeatures):
method _get_auth_header (line 147) | def _get_auth_header(self, request: Request) -> tuple[str, str] | None:
method dispatch (line 167) | async def dispatch(self, request: Request, call_next):
method _check_authentication (line 203) | def _check_authentication(self, request: Request, auth_header: tuple[s...
FILE: python/openai/openai_frontend/frontend/fastapi/routers/chat.py
function create_chat_completion (line 40) | async def create_chat_completion(
FILE: python/openai/openai_frontend/frontend/fastapi/routers/completions.py
function create_completion (line 40) | async def create_completion(
FILE: python/openai/openai_frontend/frontend/fastapi/routers/embeddings.py
function create_embedding (line 40) | async def create_embedding(
FILE: python/openai/openai_frontend/frontend/fastapi/routers/models.py
function list_models (line 39) | def list_models(request: Request) -> ListModelsResponse:
function retrieve_model (line 53) | def retrieve_model(request: Request, model_name: str) -> Model:
FILE: python/openai/openai_frontend/frontend/fastapi/routers/observability.py
function metrics (line 35) | def metrics(request: Request) -> PlainTextResponse:
function ready (line 40) | def ready(request: Request) -> Response:
FILE: python/openai/openai_frontend/frontend/fastapi_frontend.py
class FastApiFrontend (line 47) | class FastApiFrontend(OpenAIFrontend):
method __init__ (line 48) | def __init__(
method __del__ (line 71) | def __del__(self):
method start (line 74) | def start(self):
method stop (line 85) | def stop(self):
method _create_app (line 89) | def _create_app(self):
method _add_cors_middleware (line 115) | def _add_cors_middleware(self, app: FastAPI):
method _add_api_restriction_middleware (line 131) | def _add_api_restriction_middleware(self, app: FastAPI):
FILE: python/openai/openai_frontend/frontend/frontend.py
class OpenAIFrontend (line 32) | class OpenAIFrontend(Protocol):
method start (line 33) | def start(self) -> None:
method stop (line 39) | def stop(self) -> None:
FILE: python/openai/openai_frontend/main.py
function signal_handler (line 39) | def signal_handler(
function shutdown (line 47) | def shutdown(server, openai_frontend, kserve_http, kserve_grpc):
function start_kserve_frontends (line 63) | def start_kserve_frontends(server, args):
function parse_args (line 86) | def parse_args():
function main (line 199) | def main():
FILE: python/openai/openai_frontend/schemas/openai.py
class Error (line 39) | class Error(BaseModel):
class ErrorResponse (line 46) | class ErrorResponse(BaseModel):
class Object (line 50) | class Object(Enum):
class DeleteModelResponse (line 54) | class DeleteModelResponse(BaseModel):
class Model1 (line 60) | class Model1(Enum):
class PromptItem (line 66) | class PromptItem(RootModel):
class CreateCompletionRequest (line 70) | class CreateCompletionRequest(BaseModel):
class FinishReason (line 162) | class FinishReason(Enum):
class Logprobs (line 168) | class Logprobs(BaseModel):
class Choice (line 175) | class Choice(BaseModel):
class Object1 (line 185) | class Object1(Enum):
class Type (line 189) | class Type(Enum):
class Detail (line 193) | class Detail(Enum):
class ImageUrl (line 199) | class ImageUrl(BaseModel):
class ChatCompletionRequestMessageContentPartImage (line 211) | class ChatCompletionRequestMessageContentPartImage(BaseModel):
class Type1 (line 218) | class Type1(Enum):
class ChatCompletionRequestMessageContentPartText (line 222) | class ChatCompletionRequestMessageContentPartText(BaseModel):
class Role (line 229) | class Role(Enum):
method __str__ (line 232) | def __str__(self):
class ChatCompletionRequestSystemMessage (line 236) | class ChatCompletionRequestSystemMessage(BaseModel):
class Role1 (line 249) | class Role1(Enum):
method __str__ (line 252) | def __str__(self):
class Role2 (line 256) | class Role2(Enum):
method __str__ (line 259) | def __str__(self):
class FunctionCall (line 263) | class FunctionCall(BaseModel):
class Role3 (line 271) | class Role3(Enum):
method __str__ (line 274) | def __str__(self):
class ChatCompletionRequestToolMessage (line 278) | class ChatCompletionRequestToolMessage(BaseModel):
class Role4 (line 290) | class Role4(Enum):
method __str__ (line 293) | def __str__(self):
class ChatCompletionRequestFunctionMessage (line 297) | class ChatCompletionRequestFunctionMessage(BaseModel):
class FunctionParameters (line 307) | class FunctionParameters(BaseModel):
class ChatCompletionFunctions (line 311) | class ChatCompletionFunctions(BaseModel):
class ChatCompletionFunctionCallOption (line 323) | class ChatCompletionFunctionCallOption(BaseModel):
class FunctionObject (line 327) | class FunctionObject(BaseModel):
class ChatCompletionToolChoiceOption1 (line 339) | class ChatCompletionToolChoiceOption1(Enum):
class Function (line 345) | class Function(BaseModel):
class ChatCompletionNamedToolChoice (line 349) | class ChatCompletionNamedToolChoice(BaseModel):
class Function1 (line 357) | class Function1(BaseModel):
class ChatCompletionMessageToolCall (line 365) | class ChatCompletionMessageToolCall(BaseModel):
class Function2 (line 374) | class Function2(BaseModel):
class ChatCompletionMessageToolCallChunk (line 382) | class ChatCompletionMessageToolCallChunk(BaseModel):
class ChatCompletionRole (line 392) | class ChatCompletionRole(Enum):
class Role5 (line 400) | class Role5(Enum):
method __str__ (line 403) | def __str__(self):
class FunctionCall2 (line 407) | class FunctionCall2(BaseModel):
class Role6 (line 415) | class Role6(Enum):
method __str__ (line 421) | def __str__(self):
class ChatCompletionStreamResponseDelta (line 425) | class ChatCompletionStreamResponseDelta(BaseModel):
class Model2 (line 439) | class Model2(Enum):
class Type6 (line 461) | class Type6(Enum):
class ResponseFormat (line 466) | class ResponseFormat(BaseModel):
class StreamOptions (line 474) | class StreamOptions(BaseModel):
class FunctionCall3 (line 481) | class FunctionCall3(Enum):
class ChatCompletionFinishReason (line 486) | class ChatCompletionFinishReason(Enum):
class Object2 (line 494) | class Object2(Enum):
class FinishReason2 (line 498) | class FinishReason2(Enum):
class TopLogprob (line 505) | class TopLogprob(BaseModel):
class ChatCompletionTokenLogprob (line 517) | class ChatCompletionTokenLogprob(BaseModel):
class ChatCompletionLogprobs (line 533) | class ChatCompletionLogprobs(BaseModel):
class ChatCompletionStreamingResponseChoice (line 540) | class ChatCompletionStreamingResponseChoice(BaseModel):
class Object4 (line 554) | class Object4(Enum):
class CreateChatCompletionStreamResponse (line 558) | class CreateChatCompletionStreamResponse(BaseModel):
class CreateChatCompletionImageResponse (line 582) | class CreateChatCompletionImageResponse(BaseModel):
class Object5 (line 586) | class Object5(Enum):
class Model (line 590) | class Model(BaseModel):
class BaseUsage (line 604) | class BaseUsage(BaseModel):
class EmbeddingUsage (line 612) | class EmbeddingUsage(BaseUsage):
class CompletionUsage (line 616) | class CompletionUsage(BaseUsage):
class Event (line 622) | class Event(Enum):
class ErrorEvent (line 626) | class ErrorEvent(BaseModel):
class Event1 (line 631) | class Event1(Enum):
class Data (line 635) | class Data(Enum):
class DoneEvent (line 639) | class DoneEvent(BaseModel):
class ListModelsResponse (line 644) | class ListModelsResponse(BaseModel):
class CreateCompletionResponse (line 649) | class CreateCompletionResponse(BaseModel):
class ChatCompletionRequestMessageContentPart (line 670) | class ChatCompletionRequestMessageContentPart(RootModel):
class ChatCompletionRequestUserMessage (line 677) | class ChatCompletionRequestUserMessage(BaseModel):
class ChatCompletionTool (line 692) | class ChatCompletionTool(BaseModel):
class ChatCompletionToolChoiceOption (line 700) | class ChatCompletionToolChoiceOption(RootModel):
class ChatCompletionMessageToolCalls (line 707) | class ChatCompletionMessageToolCalls(RootModel):
class ChatCompletionResponseMessage (line 714) | class ChatCompletionResponseMessage(BaseModel):
class ChatCompletionChoice (line 724) | class ChatCompletionChoice(BaseModel):
class CreateChatCompletionResponse (line 738) | class CreateChatCompletionResponse(BaseModel):
class Choice2 (line 759) | class Choice2(BaseModel):
class CreateChatCompletionFunctionResponse (line 770) | class CreateChatCompletionFunctionResponse(BaseModel):
class ChatCompletionRequestAssistantMessage (line 791) | class ChatCompletionRequestAssistantMessage(BaseModel):
class ChatCompletionRequestMessage (line 812) | class ChatCompletionRequestMessage(RootModel):
method role (line 822) | def role(self):
method content (line 826) | def content(self):
class CreateChatCompletionRequest (line 830) | class CreateChatCompletionRequest(BaseModel):
class ObjectType (line 944) | class ObjectType:
class EmbeddingObject (line 952) | class EmbeddingObject(BaseModel):
class CreateEmbeddingRequest (line 968) | class CreateEmbeddingRequest(BaseModel):
class CreateEmbeddingResponse (line 998) | class CreateEmbeddingResponse(BaseModel):
FILE: python/openai/openai_frontend/utils/utils.py
class ServerError (line 30) | class ServerError(Exception):
class ClientError (line 36) | class ClientError(Exception):
class StatusCode (line 42) | class StatusCode(IntEnum):
FILE: python/openai/tests/conftest.py
function pytest_configure (line 35) | def pytest_configure(config):
function infer_test_environment (line 44) | def infer_test_environment(tool_call_parser):
function infer_test_model_repository (line 70) | def infer_test_model_repository(backend, tool_call_parser):
function tool_call_parser (line 82) | def tool_call_parser():
function backend (line 87) | def backend(tool_call_parser):
function model (line 98) | def model(tool_call_parser):
function model_repository (line 108) | def model_repository(backend, tool_call_parser):
function tokenizer_model (line 117) | def tokenizer_model():
function prompt (line 122) | def prompt():
function messages (line 127) | def messages(prompt):
function input (line 132) | def input(prompt):
function server (line 140) | def server(
function fastapi_client_class_scope (line 167) | def fastapi_client_class_scope(
function pytest_collection_modifyitems (line 187) | def pytest_collection_modifyitems(session, config, items):
FILE: python/openai/tests/test_chat_completions.py
class TestChatCompletions (line 38) | class TestChatCompletions:
method client (line 40) | def client(self, fastapi_client_class_scope):
method test_chat_completions_defaults (line 43) | def test_chat_completions_defaults(self, client, model: str, messages:...
method test_chat_completions_system_prompt (line 57) | def test_chat_completions_system_prompt(self, client, model: str):
method test_chat_completions_system_prompt_only (line 75) | def test_chat_completions_system_prompt_only(self, client, model: str):
method test_chat_completions_user_prompt_str (line 90) | def test_chat_completions_user_prompt_str(self, client, model: str):
method test_chat_completions_user_prompt_dict (line 103) | def test_chat_completions_user_prompt_dict(self, client, model: str):
method test_chat_completions_sampling_parameters (line 144) | def test_chat_completions_sampling_parameters(
method test_chat_completions_invalid_sampling_parameters (line 197) | def test_chat_completions_invalid_sampling_parameters(
method test_chat_completions_max_tokens (line 221) | def test_chat_completions_max_tokens(
method test_chat_completions_max_completion_tokens_precedence (line 267) | def test_chat_completions_max_completion_tokens_precedence(
method test_chat_completions_temperature_vllm (line 296) | def test_chat_completions_temperature_vllm(
method test_chat_completions_temperature_tensorrtllm (line 354) | def test_chat_completions_temperature_tensorrtllm(
method test_chat_completions_seed (line 416) | def test_chat_completions_seed(self, client, model: str, messages: Lis...
method test_chat_completions_no_message (line 466) | def test_chat_completions_no_message(
method test_chat_completions_empty_message (line 480) | def test_chat_completions_empty_message(
method test_chat_completions_multiple_choices (line 491) | def test_chat_completions_multiple_choices(
method test_chat_completions_streaming (line 503) | def test_chat_completions_streaming(self, client):
method test_chat_completions_no_streaming (line 506) | def test_chat_completions_no_streaming(
method test_function_calling (line 520) | def test_function_calling(self):
method test_lora (line 524) | def test_lora(self):
method test_multi_lora (line 528) | def test_multi_lora(self):
method test_request_n_choices (line 532) | def test_request_n_choices(self):
method test_request_logit_bias (line 536) | def test_request_logit_bias(self):
method test_usage_response (line 539) | def test_usage_response(self, client, model: str, messages: List[dict]):
method test_chat_completions_logprobs (line 557) | def test_chat_completions_logprobs(
method test_chat_completions_logprobs_false (line 613) | def test_chat_completions_logprobs_false(
method test_chat_completions_top_logprobs_without_logprobs (line 635) | def test_chat_completions_top_logprobs_without_logprobs(
method test_chat_completions_top_logprobs_validation (line 666) | def test_chat_completions_top_logprobs_validation(
class TestChatCompletionsTokenizers (line 690) | class TestChatCompletionsTokenizers:
method server (line 693) | def server(self, model_repository: str):
method test_chat_completions_no_tokenizer (line 701) | def test_chat_completions_no_tokenizer(
method test_chat_completions_custom_tokenizer (line 718) | def test_chat_completions_custom_tokenizer(
method test_chat_completions_invalid_chat_tokenizer (line 769) | def test_chat_completions_invalid_chat_tokenizer(
FILE: python/openai/tests/test_completions.py
class TestCompletions (line 32) | class TestCompletions:
method client (line 34) | def client(self, fastapi_client_class_scope):
method test_completions_defaults (line 37) | def test_completions_defaults(self, client, model: str, prompt: str):
method test_completions_sampling_parameters (line 69) | def test_completions_sampling_parameters(
method test_completions_max_tokens (line 106) | def test_completions_max_tokens(self, client, model: str, prompt: str):
method test_completions_temperature_vllm (line 149) | def test_completions_temperature_vllm(
method test_completions_temperature_tensorrtllm (line 204) | def test_completions_temperature_tensorrtllm(
method test_completions_seed (line 255) | def test_completions_seed(self, client, model: str, prompt: str):
method test_completions_invalid_sampling_parameters (line 309) | def test_completions_invalid_sampling_parameters(
method test_completions_empty_request (line 324) | def test_completions_empty_request(self, client):
method test_completions_no_model (line 328) | def test_completions_no_model(self, client, prompt: str):
method test_completions_no_prompt (line 332) | def test_completions_no_prompt(self, client, model: str):
method test_completions_empty_prompt (line 336) | def test_completions_empty_prompt(self, client, model: str):
method test_no_prompt (line 343) | def test_no_prompt(self, client, model: str):
method test_completions_multiple_choices (line 360) | def test_completions_multiple_choices(
method test_lora (line 384) | def test_lora(self):
method test_multi_lora (line 388) | def test_multi_lora(self):
method test_echo (line 392) | def test_echo(self, client, model: str, prompt: str, echo: bool):
method test_usage_response (line 404) | def test_usage_response(self, client, model: str, prompt: str):
method test_completions_logprobs (line 422) | def test_completions_logprobs(self, client, backend: str, model: str, ...
method test_completions_logprobs_zero (line 480) | def test_completions_logprobs_zero(self, client, model: str, prompt: s...
method test_completions_logprobs_validation (line 499) | def test_completions_logprobs_validation(self, client, model: str, pro...
FILE: python/openai/tests/test_embeddings.py
class TestEmbeddings (line 427) | class TestEmbeddings:
method client (line 429) | def client(self, fastapi_client_class_scope):
method model (line 433) | def model(self):
method tokenizer_model (line 438) | def tokenizer_model(self):
method model_repository (line 442) | def model_repository(self):
method input (line 447) | def input(self):
method _check_embedding_response (line 450) | def _check_embedding_response(
method test_embeddings_defaults (line 483) | def test_embeddings_defaults(self, client, model: str, input: str):
method test_embeddings_invalid_parameters (line 511) | def test_embeddings_invalid_parameters(
method test_embeddings_parameters (line 528) | def test_embeddings_parameters(
method test_embeddings_empty_request (line 545) | def test_embeddings_empty_request(self, client):
method test_embeddings_no_model (line 550) | def test_embeddings_no_model(self, client, input: str):
method test_embeddings_invalid_model (line 564) | def test_embeddings_invalid_model(self, client, model: str, input, err...
method test_embeddings_no_input (line 576) | def test_embeddings_no_input(self, client, model: str):
method test_embeddings_empty_input (line 587) | def test_embeddings_empty_input(self, client, model: str, input):
method test_embeddings_invalid_input (line 605) | def test_embeddings_invalid_input(self, client, model: str, input):
FILE: python/openai/tests/test_lora.py
function is_vllm_installed (line 45) | def is_vllm_installed():
function test_parse_lora_configs (line 68) | def test_parse_lora_configs(model_repository: str, model_name: str, expe...
function test_validate_lora_path_trtllm (line 125) | def test_validate_lora_path_trtllm(
class LoRATest (line 147) | class LoRATest(unittest.TestCase):
method setUp (line 161) | def setUp(self):
method _create_vllm_model_repository_with_lora (line 165) | def _create_vllm_model_repository_with_lora(self):
method _create_trtllm_model_repository_with_lora (line 198) | def _create_trtllm_model_repository_with_lora(self):
method _create_vllm_model_repository_without_lora (line 211) | def _create_vllm_model_repository_without_lora(self):
method _create_trtllm_model_repository_without_lora (line 219) | def _create_trtllm_model_repository_without_lora(self):
method _create_model_repository_mock_llm (line 223) | def _create_model_repository_mock_llm(self):
method _get_model_name (line 285) | def _get_model_name(self, lora_name):
method _test_list_models (line 291) | def _test_list_models(self, client, expected_lora_names):
method _test_retrieve_model (line 309) | def _test_retrieve_model(self, client, lora_name):
method _test_completions (line 314) | def _test_completions(self, client, lora_name):
method _test_chat_completion (line 332) | def _test_chat_completion(self, client, lora_name):
method test_lora_separator_not_set (line 351) | def test_lora_separator_not_set(self):
method test_lora_separator_set (line 392) | def test_lora_separator_set(self):
method test_lora_separator_set_for_lora_off_model (line 441) | def test_lora_separator_set_for_lora_off_model(self):
method test_lora_separator_set_for_non_vllm_formatted_models (line 482) | def test_lora_separator_set_for_non_vllm_formatted_models(self):
FILE: python/openai/tests/test_models/identity_py/1/model.py
class TritonPythonModel (line 30) | class TritonPythonModel:
method execute (line 31) | def execute(self, requests):
FILE: python/openai/tests/test_models/mock_llm/1/model.py
class TritonPythonModel (line 34) | class TritonPythonModel:
method initialize (line 35) | def initialize(self, args):
method execute (line 41) | def execute(self, requests):
method exec (line 47) | def exec(self, requests):
method exec_decoupled (line 73) | def exec_decoupled(self, requests):
FILE: python/openai/tests/test_observability.py
function model (line 36) | def model():
class TestObservability (line 40) | class TestObservability:
method client (line 42) | def client(self):
method test_not_found (line 53) | def test_not_found(self, client):
method test_startup_success (line 58) | def test_startup_success(self, client):
method test_startup_metrics (line 63) | def test_startup_metrics(self, client):
method test_models_list (line 70) | def test_models_list(self, client):
method test_models_get (line 83) | def test_models_get(self, client, model):
FILE: python/openai/tests/test_openai_client.py
class TestOpenAIClient (line 35) | class TestOpenAIClient:
method client (line 37) | def client(self, server):
method test_openai_client_models (line 40) | def test_openai_client_models(self, client: openai.OpenAI, backend: str):
method test_openai_client_completion (line 52) | def test_openai_client_completion(
method test_openai_client_chat_completion (line 73) | def test_openai_client_chat_completion(
method test_openai_client_completion_echo (line 95) | def test_openai_client_completion_echo(
method test_openai_client_function_calling (line 108) | def test_openai_client_function_calling(self):
class TestAsyncOpenAIClient (line 113) | class TestAsyncOpenAIClient:
method client (line 115) | def client(self, server):
method test_openai_client_models (line 119) | async def test_openai_client_models(self, client: openai.AsyncOpenAI, ...
method test_openai_client_completion (line 133) | async def test_openai_client_completion(
method test_openai_client_chat_completion (line 155) | async def test_openai_client_chat_completion(
method test_completion_streaming (line 178) | async def test_completion_streaming(
method test_chat_streaming (line 227) | async def test_chat_streaming(
method test_chat_streaming_usage_option (line 286) | async def test_chat_streaming_usage_option(
method test_completion_streaming_usage_option (line 378) | async def test_completion_streaming_usage_option(
method test_stream_options_without_streaming (line 466) | async def test_stream_options_without_streaming(
method test_chat_completion_logprobs (line 488) | async def test_chat_completion_logprobs(
method test_completion_logprobs (line 576) | async def test_completion_logprobs(
method test_top_logprobs_requires_logprobs (line 666) | async def test_top_logprobs_requires_logprobs(
method test_chat_top_logprobs_exceeds_max (line 694) | async def test_chat_top_logprobs_exceeds_max(
method test_completion_logprobs_exceeds_max (line 710) | async def test_completion_logprobs_exceeds_max(
FILE: python/openai/tests/test_openai_restricted_apis.py
function assert_response_success (line 37) | def assert_response_success(
function assert_response_unauthorized (line 46) | def assert_response_unauthorized(
function make_get_request (line 55) | def make_get_request(
function verify_inference_endpoints (line 67) | def verify_inference_endpoints(
function verify_model_repository_endpoints (line 129) | def verify_model_repository_endpoints(
function verify_metrics_endpoint (line 153) | def verify_metrics_endpoint(base_url, headers, expected_success, descrip...
function verify_health_endpoint (line 168) | def verify_health_endpoint(base_url, headers, expected_success, descript...
class TestRestrictedAPIInvalidArguments (line 182) | class TestRestrictedAPIInvalidArguments:
method _test_server_startup_failure (line 185) | def _test_server_startup_failure(
method test_unknown_endpoint_names (line 223) | def test_unknown_endpoint_names(self, malformed_arg):
method test_duplicate_apis (line 236) | def test_duplicate_apis(self, malformed_arg):
method test_conflict_configs (line 268) | def test_conflict_configs(self, malformed_arg):
class TestOpenAIServerRestrictedAPIs (line 278) | class TestOpenAIServerRestrictedAPIs:
method server_with_restrictions (line 282) | def server_with_restrictions(self, model_repository, tokenizer_model, ...
method test_restricted_endpoints_with_auth (line 309) | def test_restricted_endpoints_with_auth(
method test_unrestricted_endpoints (line 322) | def test_unrestricted_endpoints(self, server_with_restrictions):
class TestOpenAIServerMultipleRestrictions (line 335) | class TestOpenAIServerMultipleRestrictions:
method server_multiple_restrictions (line 339) | def server_multiple_restrictions(self, model_repository, tokenizer_mod...
method test_endpoint_groups_with_correct_auth (line 361) | def test_endpoint_groups_with_correct_auth(
method test_endpoint_groups_with_wrong_auth (line 405) | def test_endpoint_groups_with_wrong_auth(
method test_unrestricted_endpoints (line 433) | def test_unrestricted_endpoints(self, server_multiple_restrictions):
FILE: python/openai/tests/test_tool_calling.py
class TestAsyncClientToolCalling (line 142) | class TestAsyncClientToolCalling:
method client (line 144) | def client(self, server):
method validate_tool_calls_present (line 147) | def validate_tool_calls_present(
method validate_weather_tool_arguments (line 158) | def validate_weather_tool_arguments(self, parsed_arguments: Dict):
method validate_weather_forcast_tool_arguments (line 167) | def validate_weather_forcast_tool_arguments(self, parsed_arguments: Di...
method test_tool_call_and_choice (line 178) | async def test_tool_call_and_choice(self, client: openai.AsyncOpenAI, ...
method test_tool_call_with_reply_response (line 285) | async def test_tool_call_with_reply_response(
method test_tool_call_with_named_tool_choice (line 345) | async def test_tool_call_with_named_tool_choice(
method test_tool_call_with_required_tool_choice (line 448) | async def test_tool_call_with_required_tool_choice(
method test_inconsistent_tool_choice_and_tools (line 551) | async def test_inconsistent_tool_choice_and_tools(
FILE: python/openai/tests/utils.py
function setup_server (line 45) | def setup_server(model_repository: str):
function setup_fastapi_app (line 56) | def setup_fastapi_app(
class OpenAIServer (line 73) | class OpenAIServer:
method __init__ (line 77) | def __init__(
method _read_stderr (line 107) | def _read_stderr(self):
method __enter__ (line 120) | def __enter__(self):
method __exit__ (line 123) | def __exit__(self, exc_type, exc_value, traceback):
method _wait_for_server (line 132) | def _wait_for_server(self, *, url: str, timeout: float):
method url_root (line 168) | def url_root(self) -> str:
method url_for (line 171) | def url_for(self, *parts: str) -> str:
method get_client (line 174) | def get_client(self):
method get_async_client (line 180) | def get_async_client(self):
FILE: qa/L0_backend_onnxruntime/gen_add_bf16_onnx_model.py
function generate_bf16_add_model (line 36) | def generate_bf16_add_model(models_dir):
FILE: qa/L0_backend_onnxruntime/test.py
class BFloat16Test (line 36) | class BFloat16Test(unittest.TestCase):
method setUp (line 37) | def setUp(self):
method _infer_bf16 (line 46) | def _infer_bf16(self, input0_data, input1_data):
method test_bf16_add_variants (line 60) | def test_bf16_add_variants(self):
FILE: qa/L0_backend_python/argument_validation/models/argument_validation/1/model.py
class ArgumentValidationTest (line 33) | class ArgumentValidationTest(unittest.TestCase):
method test_infer_request_args (line 34) | def test_infer_request_args(self):
method test_infer_response_args (line 155) | def test_infer_response_args(self):
method test_tensor_args (line 170) | def test_tensor_args(self):
method test_log_args (line 193) | def test_log_args(self):
class TritonPythonModel (line 224) | class TritonPythonModel:
method execute (line 228) | def execute(self, requests):
FILE: qa/L0_backend_python/async_execute/concurrency_test.py
class ConcurrencyTest (line 35) | class ConcurrencyTest(unittest.TestCase):
method setUp (line 36) | def setUp(self):
method _generate_streaming_callback_and_response_pair (line 40) | def _generate_streaming_callback_and_response_pair(self):
method _concurrent_execute_requests (line 49) | def _concurrent_execute_requests(self, model_name, batch_size, number_...
method test_concurrent_execute_single_request (line 77) | def test_concurrent_execute_single_request(self):
method test_concurrent_execute_multi_request (line 83) | def test_concurrent_execute_multi_request(self):
method test_concurrent_execute_single_request_bls (line 89) | def test_concurrent_execute_single_request_bls(self):
method test_concurrent_execute_multi_request_bls (line 95) | def test_concurrent_execute_multi_request_bls(self):
method test_concurrent_execute_different_duration (line 101) | def test_concurrent_execute_different_duration(self):
method test_model_raise_exception (line 138) | def test_model_raise_exception(self):
FILE: qa/L0_backend_python/bls/bls_parameters_test.py
class TestBlsParameters (line 42) | class TestBlsParameters(unittest.TestCase):
method test_bls_parameters (line 43) | def test_bls_parameters(self):
FILE: qa/L0_backend_python/decoupled/decoupled_test.py
function prepare_decoupled_bls_cancel_inputs (line 49) | def prepare_decoupled_bls_cancel_inputs(input_value, max_sum_value, igno...
class UserData (line 77) | class UserData:
method __init__ (line 78) | def __init__(self):
function callback (line 82) | def callback(user_data, result, error):
class DecoupledTest (line 89) | class DecoupledTest(unittest.TestCase):
method setUp (line 90) | def setUp(self):
method test_decoupled_execute_error (line 93) | def test_decoupled_execute_error(self):
method test_decoupled_bls (line 139) | def test_decoupled_bls(self):
method test_decoupled_bls_stream (line 178) | def test_decoupled_bls_stream(self):
method test_decoupled_return_response_error (line 246) | def test_decoupled_return_response_error(self):
method test_decoupled_send_after_close_error (line 282) | def test_decoupled_send_after_close_error(self):
method test_decoupled_execute_cancel (line 319) | def test_decoupled_execute_cancel(self):
method test_decoupled_bls_cancel (line 355) | def test_decoupled_bls_cancel(self):
method test_decoupled_bls_ignore_cancel (line 397) | def test_decoupled_bls_ignore_cancel(self):
method test_decoupled_bls_cancel_after_cancellation (line 439) | def test_decoupled_bls_cancel_after_cancellation(self):
method test_decoupled_bls_cancel_after_completion (line 479) | def test_decoupled_bls_cancel_after_completion(self):
method test_decoupled_raise_exception (line 520) | def test_decoupled_raise_exception(self):
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py
class TritonPythonModel (line 38) | class TritonPythonModel:
method initialize (line 41) | def initialize(self, args):
method execute (line 76) | def execute(self, requests):
method _get_gpu_bls_outputs (line 125) | def _get_gpu_bls_outputs(self, input0_pb, input1_pb):
method _test_gpu_bls_add_sub (line 191) | def _test_gpu_bls_add_sub(self, is_input0_gpu, is_input1_gpu):
method execute_gpu_bls (line 233) | def execute_gpu_bls(self):
method response_thread (line 247) | def response_thread(self, response_sender, in_input):
method finalize (line 306) | def finalize(self):
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_async_cancel/1/model.py
class TritonPythonModel (line 32) | class TritonPythonModel:
method execute (line 43) | async def execute(self, requests):
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_cancel/1/model.py
class TritonPythonModel (line 31) | class TritonPythonModel:
method execute (line 42) | def execute(self, requests):
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_cancel_after_complete/1/model.py
class TritonPythonModel (line 32) | class TritonPythonModel:
method execute (line 43) | async def execute(self, requests):
FILE: qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py
class TritonPythonModel (line 35) | class TritonPythonModel:
method initialize (line 40) | def initialize(self, args):
method execute (line 59) | def execute(self, requests):
method response_thread (line 77) | def response_thread(self, response_sender, in_value):
method finalize (line 126) | def finalize(self):
FILE: qa/L0_backend_python/decoupled/models/decoupled_execute_error/1/model.py
class TritonPythonModel (line 34) | class TritonPythonModel:
method initialize (line 37) | def initialize(self, args):
method execute (line 62) | def execute(self, requests):
method response_thread (line 85) | def response_thread(self, response_sender, index, in_input):
method finalize (line 111) | def finalize(self):
FILE: qa/L0_backend_python/decoupled/models/decoupled_raise_exception/1/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method initialize (line 29) | def initialize(self, args):
method execute (line 32) | def execute(self, requests):
FILE: qa/L0_backend_python/decoupled/models/decoupled_return_response_error/1/model.py
class TritonPythonModel (line 32) | class TritonPythonModel:
method initialize (line 37) | def initialize(self, args):
method execute (line 62) | def execute(self, requests):
FILE: qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/1/model.py
class TritonPythonModel (line 32) | class TritonPythonModel:
method initialize (line 37) | def initialize(self, args):
method execute (line 62) | def execute(self, requests):
FILE: qa/L0_backend_python/ensemble/ensemble_test.py
class EnsembleTest (line 46) | class EnsembleTest(unittest.TestCase):
method setUp (line 47) | def setUp(self):
method infer (line 50) | def infer(self, model_name):
method test_ensemble (line 83) | def test_ensemble(self):
method test_ensemble_gpu (line 87) | def test_ensemble_gpu(self):
FILE: qa/L0_backend_python/io/io_test.py
class UserData (line 51) | class UserData:
method __init__ (line 52) | def __init__(self):
function callback (line 56) | def callback(user_data, result, error):
class IOTest (line 63) | class IOTest(unittest.TestCase):
method setUp (line 64) | def setUp(self):
method _run_ensemble_test (line 68) | def _run_ensemble_test(self, model_name):
method test_ensemble_io (line 109) | def test_ensemble_io(self):
method test_empty_gpu_output (line 118) | def test_empty_gpu_output(self):
method test_variable_gpu_output (line 133) | def test_variable_gpu_output(self):
method test_requested_output_default (line 166) | def test_requested_output_default(self):
method test_requested_output_decoupled (line 207) | def test_requested_output_decoupled(self):
method test_requested_output_decoupled_prior_crash (line 263) | def test_requested_output_decoupled_prior_crash(self):
FILE: qa/L0_backend_python/io/requested_output_model/model.py
function get_valid_param_value (line 38) | def get_valid_param_value(param, default_value=""):
class TritonPythonModel (line 43) | class TritonPythonModel:
method initialize (line 44) | def initialize(self, args):
method create_triton_tensors (line 55) | def create_triton_tensors(self, index):
method create_triton_response (line 66) | def create_triton_response(self, index):
method execute (line 70) | def execute(self, requests):
method finalize (line 109) | def finalize(self):
FILE: qa/L0_backend_python/lifecycle/lifecycle_test.py
class UserData (line 54) | class UserData:
method __init__ (line 55) | def __init__(self):
function callback (line 59) | def callback(user_data, result, error):
class LifecycleTest (line 66) | class LifecycleTest(unittest.TestCase):
method setUp (line 67) | def setUp(self):
method _get_metrics (line 70) | def _get_metrics(self):
method _metrics_before_test (line 76) | def _metrics_before_test(self, model, reason):
method _assert_metrics (line 85) | def _assert_metrics(
method test_error_code (line 93) | def test_error_code(self):
method test_execute_cancel (line 129) | def test_execute_cancel(self):
method test_batch_error (line 167) | def test_batch_error(self):
method test_infer_pymodel_error (line 208) | def test_infer_pymodel_error(self):
method test_triton_grpc_error_error_on (line 247) | def test_triton_grpc_error_error_on(self):
method test_triton_grpc_error_multithreaded (line 291) | def test_triton_grpc_error_multithreaded(self):
method test_triton_grpc_error_cancel (line 303) | def test_triton_grpc_error_cancel(self):
method test_triton_grpc_error_error_off (line 353) | def test_triton_grpc_error_error_off(self):
FILE: qa/L0_backend_python/logging/logging_test.py
class LogTest (line 45) | class LogTest(unittest.TestCase):
method setUp (line 46) | def setUp(self):
method test_log_output (line 49) | def test_log_output(self):
FILE: qa/L0_backend_python/model_control/model_control_test.py
class ExplicitModelTest (line 49) | class ExplicitModelTest(unittest.TestCase):
method setUp (line 50) | def setUp(self):
method send_identity_request (line 53) | def send_identity_request(self, client, model_name):
method test_model_reload (line 69) | def test_model_reload(self):
class ModelIDValidationTest (line 89) | class ModelIDValidationTest(unittest.TestCase):
method setUp (line 98) | def setUp(self):
method _send_load_model_request (line 110) | def _send_load_model_request(self, model_name):
method test_invalid_character_model_names (line 214) | def test_invalid_character_model_names(self):
method test_valid_model_names (line 281) | def test_valid_model_names(self):
FILE: qa/L0_backend_python/model_readiness/test_model_readiness.py
class UserData (line 43) | class UserData:
method __init__ (line 44) | def __init__(self):
function callback (line 48) | def callback(user_data, result, error):
function prepare_infer_args (line 55) | def prepare_infer_args(input_value):
function collect_responses (line 64) | def collect_responses(user_data, expected_responses_count):
function call_inference_identity_model (line 88) | def call_inference_identity_model(model_name, protocol, client):
class TestModelReadiness (line 109) | class TestModelReadiness(unittest.TestCase):
method setUp (line 110) | def setUp(self):
method test_model_ready (line 115) | def test_model_ready(self):
method test_model_not_ready (line 136) | def test_model_not_ready(self):
class TestUserDefinedModelReadinessFunction (line 158) | class TestUserDefinedModelReadinessFunction(unittest.TestCase):
method setUp (line 163) | def setUp(self):
method _run_inference_decoupled (line 167) | def _run_inference_decoupled(self, index, model_name, expected_respons...
method test_multiple_concurrent_ready_and_infer_requests_decoupled (line 203) | def test_multiple_concurrent_ready_and_infer_requests_decoupled(self):
method test_is_ready_coroutine_returns_true (line 254) | def test_is_ready_coroutine_returns_true(self):
method test_is_ready_returns_true (line 268) | def test_is_ready_returns_true(self):
method test_is_ready_returns_false (line 287) | def test_is_ready_returns_false(self):
method test_is_ready_raises_exception (line 306) | def test_is_ready_raises_exception(self):
method test_is_ready_returns_non_boolean (line 341) | def test_is_ready_returns_non_boolean(self):
method test_is_ready_takes_long_time (line 376) | def test_is_ready_takes_long_time(self):
method test_multiple_concurrent_ready_and_infer_requests (line 405) | def test_multiple_concurrent_ready_and_infer_requests(self):
FILE: qa/L0_backend_python/model_readiness/test_models/is_ready_fn_returns_true_decoupled/model.py
class TritonPythonModel (line 34) | class TritonPythonModel:
method execute (line 39) | def execute(self, requests):
method is_ready (line 60) | def is_ready(self) -> bool:
FILE: qa/L0_backend_python/model_readiness/test_models/readiness_coroutine_model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method initialize (line 41) | def initialize(self, args):
method execute (line 48) | def execute(self, requests):
method is_ready (line 56) | async def is_ready(self):
FILE: qa/L0_backend_python/model_readiness/test_models/readiness_model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method initialize (line 42) | def initialize(self, args):
method execute (line 52) | def execute(self, requests):
method is_ready (line 60) | def is_ready(self):
FILE: qa/L0_backend_python/parameters/response_parameters_test.py
class ResponseParametersTest (line 40) | class ResponseParametersTest(unittest.TestCase):
method setUp (line 45) | def setUp(self):
method _assert_response_parameters_match (line 48) | def _assert_response_parameters_match(self, infer_result, expected_par...
method _assert_response_parameters_infer_success (line 62) | def _assert_response_parameters_infer_success(self, params):
method _assert_response_parameters_infer_fail (line 79) | def _assert_response_parameters_infer_fail(self, params, expected_err_...
method test_setting_empty_response_parameters (line 93) | def test_setting_empty_response_parameters(self):
method test_setting_one_element_response_parameters (line 97) | def test_setting_one_element_response_parameters(self):
method test_setting_three_element_response_parameters (line 101) | def test_setting_three_element_response_parameters(self):
method test_setting_multi_element_response_parameters (line 105) | def test_setting_multi_element_response_parameters(self):
method test_setting_wrong_type_response_parameters (line 109) | def test_setting_wrong_type_response_parameters(self):
method test_setting_int_key_type_response_parameters (line 114) | def test_setting_int_key_type_response_parameters(self):
method test_setting_float_response_parameters (line 121) | def test_setting_float_response_parameters(self):
method test_setting_null_response_parameters (line 126) | def test_setting_null_response_parameters(self):
method test_setting_nested_response_parameters (line 131) | def test_setting_nested_response_parameters(self):
method test_setting_response_parameters_decoupled (line 136) | def test_setting_response_parameters_decoupled(self):
method test_setting_response_parameters_bls (line 169) | def test_setting_response_parameters_bls(self):
FILE: qa/L0_backend_python/python_based_backends/python_based_backends_test.py
class PythonBasedBackendsTest (line 43) | class PythonBasedBackendsTest(unittest.TestCase):
method setUp (line 44) | def setUp(self):
method test_add_sub_models (line 61) | def test_add_sub_models(self):
method test_python_model (line 79) | def test_python_model(self):
method test_pytorch_model (line 87) | def test_pytorch_model(self):
method _test_add_sub_model (line 93) | def _test_add_sub_model(
method tearDown (line 145) | def tearDown(self):
FILE: qa/L0_backend_python/python_test.py
class PythonTest (line 50) | class PythonTest(unittest.TestCase):
method setUp (line 51) | def setUp(self):
method _infer_help (line 54) | def _infer_help(self, model_name, shape, data_type):
method _create_cuda_region (line 68) | def _create_cuda_region(self, client, size, name):
method _optional_input_infer (line 79) | def _optional_input_infer(self, model_name, has_input0, has_input1):
method test_growth_error (line 129) | def test_growth_error(self):
method test_gpu_tensor_error (line 172) | def test_gpu_tensor_error(self):
method test_dlpack_tensor_error (line 206) | def test_dlpack_tensor_error(self):
method test_async_infer (line 247) | def test_async_infer(self):
method test_bool (line 350) | def test_bool(self):
method test_bf16 (line 368) | def test_bf16(self):
method test_infer_pytorch (line 398) | def test_infer_pytorch(self):
method test_init_args (line 438) | def test_init_args(self):
method test_unicode (line 459) | def test_unicode(self):
method test_optional_input (line 485) | def test_optional_input(self):
method test_string (line 493) | def test_string(self):
method test_non_contiguous (line 521) | def test_non_contiguous(self):
FILE: qa/L0_backend_python/request_rescheduling/grpc_endpoint_test.py
class UserData (line 47) | class UserData:
method __init__ (line 48) | def __init__(self):
function callback (line 52) | def callback(user_data, result, error):
class GrpcEndpointTest (line 59) | class GrpcEndpointTest(unittest.TestCase):
method test_grpc_decoupled (line 60) | def test_grpc_decoupled(self, sequence_id=0, sequence_start=False):
method test_grpc_non_decoupled (line 90) | def test_grpc_non_decoupled(self, sequence_id=0, sequence_start=False):
FILE: qa/L0_backend_python/response_sender/response_sender_complete_final_test.py
class ResponseSenderTest (line 39) | class ResponseSenderTest(unittest.TestCase):
method _generate_streaming_callback_and_responses_pair (line 40) | def _generate_streaming_callback_and_responses_pair(self):
method test_respond_after_complete_final (line 48) | def test_respond_after_complete_final(self):
FILE: qa/L0_backend_python/response_sender/response_sender_test.py
class ResponseSenderTest (line 39) | class ResponseSenderTest(unittest.TestCase):
method _get_inputs (line 111) | def _get_inputs(
method _generate_streaming_callback_and_responses_pair (line 146) | def _generate_streaming_callback_and_responses_pair(self):
method _infer_parallel (line 154) | def _infer_parallel(self, model_name, parallel_inputs):
method _infer (line 163) | def _infer(
method _assert_responses_valid (line 181) | def _assert_responses_valid(
method _assert_responses_exception (line 212) | def _assert_responses_exception(self, responses, expected_message):
method _assert_decoupled_infer_success (line 220) | def _assert_decoupled_infer_success(
method _assert_non_decoupled_infer_with_expected_response_success (line 260) | def _assert_non_decoupled_infer_with_expected_response_success(
method _assert_non_decoupled_infer_success (line 303) | def _assert_non_decoupled_infer_success(
method test_decoupled_zero_response_pre_return (line 323) | def test_decoupled_zero_response_pre_return(self):
method test_decoupled_zero_response_post_return (line 329) | def test_decoupled_zero_response_post_return(self):
method test_decoupled_one_response_pre_return (line 335) | def test_decoupled_one_response_pre_return(self):
method test_decoupled_one_response_post_return (line 341) | def test_decoupled_one_response_post_return(self):
method test_decoupled_two_response_pre_return (line 347) | def test_decoupled_two_response_pre_return(self):
method test_decoupled_two_response_post_return (line 353) | def test_decoupled_two_response_post_return(self):
method test_decoupled_response_pre_and_post_return (line 359) | def test_decoupled_response_pre_and_post_return(self):
method test_non_decoupled_one_response_on_return (line 365) | def test_non_decoupled_one_response_on_return(self):
method test_non_decoupled_one_response_pre_return (line 371) | def test_non_decoupled_one_response_pre_return(self):
method test_non_decoupled_one_response_post_return (line 377) | def test_non_decoupled_one_response_post_return(self):
method test_decoupled_multiple_requests (line 383) | def test_decoupled_multiple_requests(self):
method test_non_decoupled_multiple_requests (line 416) | def test_non_decoupled_multiple_requests(self):
method test_decoupled_one_response_on_return (line 445) | def test_decoupled_one_response_on_return(self):
method test_decoupled_one_response_pre_and_on_return (line 458) | def test_decoupled_one_response_pre_and_on_return(self):
method test_decoupled_one_response_on_and_post_return (line 476) | def test_decoupled_one_response_on_and_post_return(self):
method test_non_decoupled_zero_response_pre_return (line 494) | def test_non_decoupled_zero_response_pre_return(self):
method test_non_decoupled_zero_response_post_return (line 518) | def test_non_decoupled_zero_response_post_return(self):
method test_non_decoupled_two_response_pre_return (line 528) | def test_non_decoupled_two_response_pre_return(self):
method test_non_decoupled_two_response_post_return (line 544) | def test_non_decoupled_two_response_post_return(self):
method test_non_decoupled_one_response_pre_and_on_return (line 560) | def test_non_decoupled_one_response_pre_and_on_return(self):
method test_non_decoupled_one_response_on_and_post_return (line 573) | def test_non_decoupled_one_response_on_and_post_return(self):
FILE: qa/L0_backend_python/restart/models/restart/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method execute (line 34) | def execute(self, requests):
FILE: qa/L0_backend_python/restart/restart_test.py
class RestartTest (line 46) | class RestartTest(unittest.TestCase):
method setUp (line 47) | def setUp(self):
method _infer_helper (line 50) | def _infer_helper(self, model_name, shape, data_type):
method test_restart (line 63) | def test_restart(self):
method test_infer (line 83) | def test_infer(self):
FILE: qa/L0_backend_python/test_infer_shm_leak.py
class TestInferShmLeak (line 51) | class TestInferShmLeak:
method _run_unittest (line 52) | def _run_unittest(self, model_name):
method test_shm_leak (line 62) | def test_shm_leak(self):
FILE: qa/L0_batch_custom/batch_custom_test.py
class BatcherTest (line 53) | class BatcherTest(tu.TestResultCollector):
method setUp (line 54) | def setUp(self):
method tearDown (line 63) | def tearDown(self):
method add_deferred_exception (line 66) | def add_deferred_exception(self, ex):
method check_deferred_exception (line 71) | def check_deferred_exception(self):
method check_response (line 77) | def check_response(
method check_status (line 142) | def check_status(self, model_name, batch_exec, request_cnt, infer_cnt,...
method test_volume_batching (line 229) | def test_volume_batching(self):
FILE: qa/L0_batch_input/batch_input_test.py
class BatchInputTest (line 43) | class BatchInputTest(tu.TestResultCollector):
method setUp (line 44) | def setUp(self):
method set_inputs (line 55) | def set_inputs(self, shapes, input_name):
method set_inputs_for_batch_item (line 66) | def set_inputs_for_batch_item(self, shapes, input_name):
method test_ragged_output (line 73) | def test_ragged_output(self):
method test_ragged_input (line 115) | def test_ragged_input(self):
method test_element_count (line 154) | def test_element_count(self):
method test_accumulated_element_count (line 191) | def test_accumulated_element_count(self):
method test_accumulated_element_count_with_zero (line 228) | def test_accumulated_element_count_with_zero(self):
method test_max_element_count_as_shape (line 265) | def test_max_element_count_as_shape(self):
method test_batch_item_shape_flatten (line 302) | def test_batch_item_shape_flatten(self):
method test_batch_item_shape (line 346) | def test_batch_item_shape(self):
FILE: qa/L0_batcher/batcher_test.py
class BatcherTest (line 82) | class BatcherTest(tu.TestResultCollector):
method setUp (line 83) | def setUp(self):
method tearDown (line 92) | def tearDown(self):
method create_advance (line 105) | def create_advance(self, shm_regions=None):
method add_deferred_exception (line 131) | def add_deferred_exception(self, ex):
method check_deferred_exception (line 136) | def check_deferred_exception(self):
method check_response (line 142) | def check_response(
method check_setup (line 211) | def check_setup(self, model_name, preferred_batch_sizes, max_queue_del...
method check_status (line 220) | def check_status(self, model_name, batch_exec, request_cnt, infer_cnt,...
method test_static_batch_preferred (line 297) | def test_static_batch_preferred(self):
method test_static_batch_lt_any_preferred (line 328) | def test_static_batch_lt_any_preferred(self):
method test_static_batch_not_preferred (line 353) | def test_static_batch_not_preferred(self):
method test_static_batch_gt_max_preferred (line 378) | def test_static_batch_gt_max_preferred(self):
method test_multi_batch_different_shape_allow_ragged (line 403) | def test_multi_batch_different_shape_allow_ragged(self):
method test_multi_batch_different_shape (line 450) | def test_multi_batch_different_shape(self):
method test_multi_batch_not_preferred (line 511) | def test_multi_batch_not_preferred(self):
method test_multi_batch_not_preferred_different_shape (line 573) | def test_multi_batch_not_preferred_different_shape(self):
method test_multi_batch_preferred_different_shape (line 646) | def test_multi_batch_preferred_different_shape(self):
method test_multi_batch_gt_max_preferred (line 732) | def test_multi_batch_gt_max_preferred(self):
method test_multi_batch_sum_gt_max_preferred (line 786) | def test_multi_batch_sum_gt_max_preferred(self):
method test_multi_same_output0 (line 847) | def test_multi_same_output0(self):
method test_multi_same_output1 (line 901) | def test_multi_same_output1(self):
method test_multi_different_outputs (line 955) | def test_multi_different_outputs(self):
method test_multi_different_output_order (line 1010) | def test_multi_different_output_order(self):
method test_multi_batch_delayed_sum_gt_max_preferred (line 1060) | def test_multi_batch_delayed_sum_gt_max_preferred(self):
method test_multi_batch_delayed_use_max_batch (line 1124) | def test_multi_batch_delayed_use_max_batch(self):
method test_multi_batch_delayed_preferred_different_shape (line 1208) | def test_multi_batch_delayed_preferred_different_shape(self):
method test_multi_batch_use_biggest_preferred (line 1299) | def test_multi_batch_use_biggest_preferred(self):
method test_multi_batch_use_best_preferred (line 1407) | def test_multi_batch_use_best_preferred(self):
method test_multi_batch_preserve_ordering (line 1484) | def test_multi_batch_preserve_ordering(self):
method test_preferred_batch_only_aligned (line 1527) | def test_preferred_batch_only_aligned(self):
method test_preferred_batch_only_unaligned (line 1609) | def test_preferred_batch_only_unaligned(self):
method test_preferred_batch_only_use_biggest_preferred (line 1704) | def test_preferred_batch_only_use_biggest_preferred(self):
method test_preferred_batch_only_use_no_preferred_size (line 1825) | def test_preferred_batch_only_use_no_preferred_size(self):
method test_max_queue_delay_only_non_default (line 1893) | def test_max_queue_delay_only_non_default(self):
method test_max_queue_delay_only_default (line 1939) | def test_max_queue_delay_only_default(self):
FILE: qa/L0_batcher/queue_timeout_test.py
class TestMaxQueueDelayTimeout (line 43) | class TestMaxQueueDelayTimeout(unittest.TestCase):
method setUp (line 44) | def setUp(self):
method _get_inputs (line 48) | def _get_inputs(self, batch_size):
method _generate_callback_and_response_pair (line 56) | def _generate_callback_and_response_pair(self):
method test_default_queue_policy_timeout_prompt_response (line 67) | def test_default_queue_policy_timeout_prompt_response(self):
FILE: qa/L0_batcher/verify_timestamps.py
function verify_timestamps (line 34) | def verify_timestamps(traces, preserve):
FILE: qa/L0_buffer_attributes/buffer_attributes_test.py
class BufferAttributesTest (line 43) | class BufferAttributesTest(tu.TestResultCollector):
method test_buffer_attributes (line 44) | def test_buffer_attributes(self):
FILE: qa/L0_buffer_attributes/models/bls/1/model.py
class TritonPythonModel (line 31) | class TritonPythonModel:
method execute (line 32) | def execute(self, requests):
FILE: qa/L0_buffer_attributes/models/identity/1/model.py
class TritonPythonModel (line 30) | class TritonPythonModel:
method execute (line 31) | def execute(self, requests):
FILE: qa/L0_client_nobatch/client_test.py
class ClientNoBatchTest (line 42) | class ClientNoBatchTest(tu.TestResultCollector):
method test_nobatch_request_for_batching_model (line 43) | def test_nobatch_request_for_batching_model(self):
method test_batch_request_for_nobatching_model (line 94) | def test_batch_request_for_nobatching_model(self):
method test_nobatch_request_for_nonbatching_model (line 146) | def test_nobatch_request_for_nonbatching_model(self):
method test_batch_request_for_batching_model (line 191) | def test_batch_request_for_batching_model(self):
FILE: qa/L0_client_timeout/client_infer_timeout_test.py
class UserData (line 45) | class UserData:
method __init__ (line 46) | def __init__(self):
function callback (line 50) | def callback(user_data, result, error):
class ClientInferTimeoutTest (line 57) | class ClientInferTimeoutTest(tu.TestResultCollector):
method setUp (line 58) | def setUp(self):
method _prepare_request (line 64) | def _prepare_request(self, protocol):
method test_grpc_infer (line 78) | def test_grpc_infer(self):
method test_grpc_async_infer (line 107) | def test_grpc_async_infer(self):
method test_grpc_stream_infer (line 147) | def test_grpc_stream_infer(self):
method test_http_infer (line 188) | def test_http_infer(self):
method test_http_async_infer (line 216) | def test_http_async_infer(self):
FILE: qa/L0_client_timeout/client_non_infer_timeout_test.py
class ClientNonInferTimeoutTest (line 41) | class ClientNonInferTimeoutTest(tu.TestResultCollector):
method setUp (line 42) | def setUp(self):
method test_grpc_server_live (line 49) | def test_grpc_server_live(self):
method test_grpc_is_server_ready (line 60) | def test_grpc_is_server_ready(self):
method test_grpc_is_model_ready (line 71) | def test_grpc_is_model_ready(self):
method test_grpc_get_server_metadata (line 86) | def test_grpc_get_server_metadata(self):
method test_grpc_get_model_metadata (line 96) | def test_grpc_get_model_metadata(self):
method test_grpc_get_model_config (line 109) | def test_grpc_get_model_config(self):
method test_grpc_model_repository_index (line 122) | def test_grpc_model_repository_index(self):
method test_grpc_load_model (line 133) | def test_grpc_load_model(self):
method test_grpc_unload_model (line 150) | def test_grpc_unload_model(self):
method test_grpc_get_inference_statistics (line 165) | def test_grpc_get_inference_statistics(self):
method test_grpc_update_trace_settings (line 178) | def test_grpc_update_trace_settings(self):
method test_grpc_get_trace_settings (line 191) | def test_grpc_get_trace_settings(self):
method test_grpc_update_log_settings (line 204) | def test_grpc_update_log_settings(self):
method test_grpc_get_log_settings (line 218) | def test_grpc_get_log_settings(self):
method test_grpc_get_system_shared_memory_status (line 231) | def test_grpc_get_system_shared_memory_status(self):
method test_grpc_register_system_shared_memory (line 244) | def test_grpc_register_system_shared_memory(self):
method test_grpc_unregister_system_shared_memory (line 272) | def test_grpc_unregister_system_shared_memory(self):
method test_grpc_get_cuda_shared_memory_status (line 285) | def test_grpc_get_cuda_shared_memory_status(self):
method test_grpc_register_cuda_shared_memory (line 296) | def test_grpc_register_cuda_shared_memory(self):
method test_grpc_unregister_cuda_shared_memory (line 327) | def test_grpc_unregister_cuda_shared_memory(self):
FILE: qa/L0_cuda_graph/trt_cuda_graph_test.py
class TrtCudaGraphTest (line 41) | class TrtCudaGraphTest(tu.TestResultCollector):
method setUp (line 44) | def setUp(self):
method _check_infer (line 49) | def _check_infer(self, tensor_shape, batch_size=1):
method _erroneous_infer (line 71) | def _erroneous_infer(self, tensor_shape, batch_size):
method test_fixed_shape (line 117) | def test_fixed_shape(self):
method test_dynamic_shape (line 123) | def test_dynamic_shape(self):
method test_range_fixed_shape (line 130) | def test_range_fixed_shape(self):
method test_range_dynamic_shape (line 140) | def test_range_dynamic_shape(self):
method test_nobatch_fixed_shape (line 155) | def test_nobatch_fixed_shape(self):
FILE: qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
class CudaSharedMemoryTestBase (line 49) | class CudaSharedMemoryTestBase(tu.TestResultCollector):
method setUp (line 52) | def setUp(self):
method tearDown (line 56) | def tearDown(self):
method _setup_client (line 59) | def _setup_client(self):
method _configure_server (line 72) | def _configure_server(
method _cleanup_shm_handles (line 149) | def _cleanup_shm_handles(self):
class CudaSharedMemoryTest (line 155) | class CudaSharedMemoryTest(CudaSharedMemoryTestBase):
method test_invalid_create_shm (line 156) | def test_invalid_create_shm(self):
method test_valid_create_set_register (line 165) | def test_valid_create_set_register(self):
method test_unregister_before_register (line 181) | def test_unregister_before_register(self):
method test_unregister_after_register (line 192) | def test_unregister_after_register(self):
method test_reregister_after_register (line 206) | def test_reregister_after_register(self):
method test_unregister_after_inference (line 227) | def test_unregister_after_inference(self):
method test_register_after_inference (line 253) | def test_register_after_inference(self):
method test_too_big_shm (line 282) | def test_too_big_shm(self):
method test_mixed_raw_shm (line 311) | def test_mixed_raw_shm(self):
method test_unregisterall (line 332) | def test_unregisterall(self):
method test_register_out_of_bound (line 348) | def test_register_out_of_bound(self):
method test_infer_offset_out_of_bound (line 360) | def test_infer_offset_out_of_bound(self):
method test_infer_byte_size_out_of_bound (line 390) | def test_infer_byte_size_out_of_bound(self):
function callback (line 418) | def callback(user_data, result, error):
class TestCudaSharedMemoryUnregister (line 425) | class TestCudaSharedMemoryUnregister(CudaSharedMemoryTestBase):
method _create_request_data (line 426) | def _create_request_data(self):
method _test_unregister_shm_request_pass (line 456) | def _test_unregister_shm_request_pass(self):
method _test_shm_not_found (line 469) | def _test_shm_not_found(self):
method _test_shm_found (line 480) | def _test_shm_found(self):
method test_unregister_shm_during_inference_single_req_http (line 489) | def test_unregister_shm_during_inference_single_req_http(self):
method test_unregister_shm_during_inference_multiple_req_http (line 508) | def test_unregister_shm_during_inference_multiple_req_http(self):
method test_unregister_shm_after_inference_http (line 541) | def test_unregister_shm_after_inference_http(self):
method test_unregister_shm_during_inference_single_req_grpc (line 564) | def test_unregister_shm_during_inference_single_req_grpc(self):
method test_unregister_shm_during_inference_multiple_req_grpc (line 591) | def test_unregister_shm_during_inference_multiple_req_grpc(self):
method test_unregister_shm_after_inference_grpc (line 639) | def test_unregister_shm_after_inference_grpc(self):
class CudaSharedMemoryTestRawHttpRequest (line 671) | class CudaSharedMemoryTestRawHttpRequest(unittest.TestCase):
method setUp (line 672) | def setUp(self):
method tearDown (line 677) | def tearDown(self):
method _generate_mock_base64_raw_handle (line 683) | def _generate_mock_base64_raw_handle(self, data_length):
method _send_register_cshm_request (line 693) | def _send_register_cshm_request(self, raw_handle, device_id, byte_size...
method test_exceeds_cshm_handle_size_limit (line 711) | def test_exceeds_cshm_handle_size_limit(self):
method test_invalid_small_cshm_handle (line 736) | def test_invalid_small_cshm_handle(self):
method test_valid_cshm_handle (line 756) | def test_valid_cshm_handle(self):
FILE: qa/L0_data_compression/validation.py
function generate_compressed_data (line 32) | def generate_compressed_data():
function validate_compressed_data (line 44) | def validate_compressed_data():
FILE: qa/L0_decoupled/decoupled_test.py
class UserData (line 47) | class UserData:
method __init__ (line 48) | def __init__(self):
function callback (line 52) | def callback(user_data, result, error):
class DecoupledTest (line 59) | class DecoupledTest(tu.TestResultCollector):
method setUp (line 60) | def setUp(self):
method _stream_infer_with_params (line 106) | def _stream_infer_with_params(
method _stream_infer (line 171) | def _stream_infer(
method _fan_validate (line 220) | def _fan_validate(self, result_list, data_offset, repeat_count):
method _nested_validate (line 230) | def _nested_validate(self, result_list, data_offset, repeat_count):
method _decoupled_infer (line 246) | def _decoupled_infer(
method test_one_to_none (line 336) | def test_one_to_none(self):
method test_one_to_one (line 348) | def test_one_to_one(self):
method test_one_to_many (line 367) | def test_one_to_many(self):
method test_one_to_multi_many (line 404) | def test_one_to_multi_many(self):
method test_response_order (line 441) | def test_response_order(self):
method _no_streaming_helper (line 506) | def _no_streaming_helper(self, protocol):
method test_no_streaming (line 560) | def test_no_streaming(self):
method test_wrong_shape (line 567) | def test_wrong_shape(self):
class NonDecoupledTest (line 607) | class NonDecoupledTest(tu.TestResultCollector):
method setUp (line 608) | def setUp(self):
method _input_data (line 622) | def _input_data(self, in_value, delay_value, wait_value):
method _async_callback (line 629) | def _async_callback(self, result, error):
method test_grpc (line 635) | def test_grpc(self):
method test_http (line 660) | def test_http(self):
method test_grpc_async (line 685) | def test_grpc_async(self):
method test_grpc_async_cancel (line 739) | def test_grpc_async_cancel(self):
FILE: qa/L0_device_memory_tracker/test.py
class UnifiedClientProxy (line 37) | class UnifiedClientProxy:
method __init__ (line 38) | def __init__(self, client):
method __getattr__ (line 41) | def __getattr__(self, attr):
class MemoryUsageTest (line 53) | class MemoryUsageTest(unittest.TestCase):
method setUp (line 54) | def setUp(self):
method tearDown (line 60) | def tearDown(self):
method report_used_gpu_memory (line 63) | def report_used_gpu_memory(self):
method is_testing_backend (line 67) | def is_testing_backend(self, model_name, backend_name):
method verify_recorded_usage (line 70) | def verify_recorded_usage(self, model_stat):
method test_onnx_http (line 93) | def test_onnx_http(self):
method test_plan_grpc (line 100) | def test_plan_grpc(self):
FILE: qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py
class DynaSequenceBatcherTest (line 64) | class DynaSequenceBatcherTest(su.SequenceBatcherTestUtil):
method get_datatype (line 65) | def get_datatype(self, trial):
method get_expected_result (line 68) | def get_expected_result(self, expected_result, corrid, value, trial, f...
method get_expected_result_implicit (line 90) | def get_expected_result_implicit(
method test_simple_sequence (line 95) | def test_simple_sequence(self):
method test_length1_sequence (line 155) | def test_length1_sequence(self):
method _multi_sequence_impl (line 203) | def _multi_sequence_impl(
method test_multi_sequence (line 405) | def test_multi_sequence(self):
method test_multi_parallel_sequence (line 410) | def test_multi_parallel_sequence(self):
method test_multi_sequence_different_shape (line 415) | def test_multi_sequence_different_shape(self):
method test_multi_sequence_different_shape_allow_ragged (line 423) | def test_multi_sequence_different_shape_allow_ragged(self):
method test_backlog (line 432) | def test_backlog(self):
method test_backlog_fill (line 632) | def test_backlog_fill(self):
method test_backlog_fill_no_end (line 852) | def test_backlog_fill_no_end(self):
method test_backlog_sequence_timeout (line 1080) | def test_backlog_sequence_timeout(self):
FILE: qa/L0_grpc/client_plugin_models/client_plugin_test/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method execute (line 34) | def execute(self, requests):
FILE: qa/L0_grpc/grpc_basic_auth_test.py
class GRPCBasicAuthTest (line 39) | class GRPCBasicAuthTest(tu.TestResultCollector):
method setUp (line 40) | def setUp(self):
method test_client_call (line 45) | def test_client_call(self):
method tearDown (line 48) | def tearDown(self):
class GRPCBasicAuthAsyncTest (line 52) | class GRPCBasicAuthAsyncTest(unittest.IsolatedAsyncioTestCase):
method asyncSetUp (line 53) | async def asyncSetUp(self):
method test_client_call (line 58) | async def test_client_call(self):
method asyncTearDown (line 61) | async def asyncTearDown(self):
FILE: qa/L0_grpc/grpc_client_plugin_test.py
class TestPlugin (line 43) | class TestPlugin(InferenceServerClientPlugin):
method __init__ (line 44) | def __init__(self, headers):
method __call__ (line 47) | def __call__(self, request):
function prepare_infer_inputs (line 51) | def prepare_infer_inputs(headers):
class GRPCClientPluginAsyncTest (line 66) | class GRPCClientPluginAsyncTest(unittest.IsolatedAsyncioTestCase):
method asyncSetUp (line 67) | async def asyncSetUp(self):
method test_simple_infer (line 72) | async def test_simple_infer(self):
method asyncTearDown (line 86) | async def asyncTearDown(self):
class GRPCClientPluginTest (line 90) | class GRPCClientPluginTest(tu.TestResultCollector):
method setUp (line 91) | def setUp(self):
method test_simple_infer (line 96) | def test_simple_infer(self):
method tearDown (line 115) | def tearDown(self):
FILE: qa/L0_grpc/python_grpc_aio_test.py
class TestGrpcAioClient (line 34) | class TestGrpcAioClient(unittest.IsolatedAsyncioTestCase):
method setUp (line 37) | def setUp(self):
method asyncTearDown (line 40) | async def asyncTearDown(self):
method test_is_server_live (line 43) | async def test_is_server_live(self):
method test_is_server_ready (line 47) | async def test_is_server_ready(self):
method test_is_model_ready (line 51) | async def test_is_model_ready(self):
method test_get_server_metadata (line 55) | async def test_get_server_metadata(self):
method test_get_model_metadata (line 62) | async def test_get_model_metadata(self):
method test_get_model_config (line 66) | async def test_get_model_config(self):
method test_get_model_repository_index (line 70) | async def test_get_model_repository_index(self):
method test_load_model (line 74) | async def test_load_model(self):
method test_unload_model (line 81) | async def test_unload_model(self):
method test_get_inference_statistics (line 88) | async def test_get_inference_statistics(self):
method test_update_trace_settings (line 91) | async def test_update_trace_settings(self):
method test_get_trace_settings (line 94) | async def test_get_trace_settings(self):
method test_get_system_shared_memory_status (line 97) | async def test_get_system_shared_memory_status(self):
method test_register_system_shared_memory (line 100) | async def test_register_system_shared_memory(self):
method test_unregister_system_shared_memory (line 109) | async def test_unregister_system_shared_memory(self):
method test_get_cuda_shared_memory_status (line 112) | async def test_get_cuda_shared_memory_status(self):
method test_register_cuda_shared_memory (line 115) | async def test_register_cuda_shared_memory(self):
method test_unregister_cuda_shared_memory (line 122) | async def test_unregister_cuda_shared_memory(self):
FILE: qa/L0_grpc/python_unit_test.py
class UserData (line 40) | class UserData:
method __init__ (line 41) | def __init__(self):
function callback (line 45) | def callback(user_data, result, error):
class RestrictedProtocolTest (line 52) | class RestrictedProtocolTest(unittest.TestCase):
method setUp (line 53) | def setUp(self):
method test_sanity (line 59) | def test_sanity(self):
method test_model_repository (line 68) | def test_model_repository(self):
method test_health (line 83) | def test_health(self):
method test_infer (line 90) | def test_infer(self):
method test_stream_infer (line 112) | def test_stream_infer(self):
FILE: qa/L0_grpc_state_cleanup/cleanup_test.py
class UserData (line 46) | class UserData:
method __init__ (line 47) | def __init__(self):
function callback (line 51) | def callback(user_data, result, error):
class CleanUpTest (line 64) | class CleanUpTest(tu.TestResultCollector):
method setUp (line 67) | def setUp(self):
method _prepare_inputs_and_outputs (line 72) | def _prepare_inputs_and_outputs(self, kind):
method _simple_infer (line 93) | def _simple_infer(
method _stream_infer_with_params (line 147) | def _stream_infer_with_params(
method _stream_infer (line 223) | def _stream_infer(
method _streaming_infer (line 287) | def _streaming_infer(
method _decoupled_infer (line 333) | def _decoupled_infer(
method test_simple_infer (line 424) | def test_simple_infer(self):
method test_simple_infer_cancellation (line 429) | def test_simple_infer_cancellation(self):
method test_simple_infer_timeout (line 437) | def test_simple_infer_timeout(self):
method test_simple_infer_error_status (line 444) | def test_simple_infer_error_status(self):
method test_simple_infer_shutdownserver (line 454) | def test_simple_infer_shutdownserver(self):
method test_streaming_infer (line 464) | def test_streaming_infer(self):
method test_streaming_cancellation (line 470) | def test_streaming_cancellation(self):
method test_streaming_timeout (line 478) | def test_streaming_timeout(self):
method test_streaming_error_status (line 485) | def test_streaming_error_status(self):
method test_streaming_infer_shutdownserver (line 502) | def test_streaming_infer_shutdownserver(self):
method test_decoupled_infer (line 517) | def test_decoupled_infer(self):
method test_decoupled_cancellation (line 524) | def test_decoupled_cancellation(self):
method test_decoupled_timeout (line 534) | def test_decoupled_timeout(self):
method test_decoupled_error_status (line 543) | def test_decoupled_error_status(self):
method test_decoupled_infer_shutdownserver (line 560) | def test_decoupled_infer_shutdownserver(self):
method test_decoupled_infer_with_params_shutdownserver (line 574) | def test_decoupled_infer_with_params_shutdownserver(self):
method test_decoupled_infer_complete (line 588) | def test_decoupled_infer_complete(self):
method test_non_decoupled_streaming_multi_response (line 597) | def test_non_decoupled_streaming_multi_response(self):
FILE: qa/L0_http/generate_endpoint_test.py
class GenerateEndpointTest (line 42) | class GenerateEndpointTest(tu.TestResultCollector):
method setUp (line 43) | def setUp(self):
method _get_infer_url (line 46) | def _get_infer_url(self, model_name, route):
method generate_stream (line 49) | def generate_stream(self, model_name, inputs, stream=False):
method generate (line 63) | def generate(self, model_name, inputs):
method generate_expect_failure (line 69) | def generate_expect_failure(self, model_name, inputs, msg):
method generate_stream_expect_failure (line 83) | def generate_stream_expect_failure(self, model_name, inputs, msg):
method generate_stream_expect_success (line 94) | def generate_stream_expect_success(
method check_sse_responses (line 101) | def check_sse_responses(self, res, expected_res):
method test_generate (line 130) | def test_generate(self):
method test_generate_with_all_inputs (line 145) | def test_generate_with_all_inputs(self):
method test_request_id (line 160) | def test_request_id(self):
method test_generate_stream (line 203) | def test_generate_stream(self):
method test_streaming (line 210) | def test_streaming(self):
method test_missing_inputs (line 224) | def test_missing_inputs(self):
method test_invalid_input_types (line 260) | def test_invalid_input_types(self):
method test_duplicate_inputs (line 277) | def test_duplicate_inputs(self):
method test_generate_stream_response_error (line 302) | def test_generate_stream_response_error(self):
method test_race_condition (line 323) | def test_race_condition(self):
method test_one_response (line 352) | def test_one_response(self):
method test_zero_response (line 376) | def test_zero_response(self):
method test_many_response (line 390) | def test_many_response(self):
method test_complex_schema (line 404) | def test_complex_schema(self):
method test_close_connection_during_streaming (line 433) | def test_close_connection_during_streaming(self):
method test_parameters (line 445) | def test_parameters(self):
method test_0_dimension_output (line 483) | def test_0_dimension_output(self):
FILE: qa/L0_http/http_basic_auth_test.py
class HTTPBasicAuthTest (line 39) | class HTTPBasicAuthTest(tu.TestResultCollector):
method setUp (line 40) | def setUp(self):
method test_client_call (line 45) | def test_client_call(self):
method tearDown (line 48) | def tearDown(self):
class HTTPBasicAuthAsyncTest (line 52) | class HTTPBasicAuthAsyncTest(unittest.IsolatedAsyncioTestCase):
method asyncSetUp (line 53) | async def asyncSetUp(self):
method test_client_call (line 58) | async def test_client_call(self):
method asyncTearDown (line 61) | async def asyncTearDown(self):
FILE: qa/L0_http/http_client_plugin_test.py
class TestPlugin (line 44) | class TestPlugin(InferenceServerClientPlugin):
method __init__ (line 45) | def __init__(self, headers):
method __call__ (line 48) | def __call__(self, request):
class HTTPClientPluginAsyncTest (line 52) | class HTTPClientPluginAsyncTest(unittest.IsolatedAsyncioTestCase):
method asyncSetUp (line 53) | async def asyncSetUp(self):
method test_server_is_live (line 58) | async def test_server_is_live(self):
method test_simple_infer (line 76) | async def test_simple_infer(self):
method asyncTearDown (line 111) | async def asyncTearDown(self):
class HTTPClientPluginTest (line 115) | class HTTPClientPluginTest(tu.TestResultCollector):
method setUp (line 116) | def setUp(self):
method test_server_is_live (line 124) | def test_server_is_live(self):
method test_simple_infer (line 138) | def test_simple_infer(self):
method tearDown (line 170) | def tearDown(self):
FILE: qa/L0_http/http_input_size_limit_test.py
class InferSizeLimitTest (line 59) | class InferSizeLimitTest(tu.TestResultCollector):
method _get_infer_url (line 60) | def _get_infer_url(self, model_name):
method test_default_limit_raw_binary (line 63) | def test_default_limit_raw_binary(self):
method test_default_limit_json (line 129) | def test_default_limit_json(self):
method test_large_input_raw_binary (line 216) | def test_large_input_raw_binary(self):
method test_large_input_json (line 284) | def test_large_input_json(self):
method test_large_string_in_json (line 371) | def test_large_string_in_json(self):
method _create_compressed_payload (line 420) | def _create_compressed_payload(self, target_size):
method test_default_limit_compressed (line 446) | def test_default_limit_compressed(self):
method test_large_input_compressed (line 528) | def test_large_input_compressed(self):
FILE: qa/L0_http/http_request_many_chunks.py
class HTTPRequestManyChunksTest (line 32) | class HTTPRequestManyChunksTest(unittest.TestCase):
method setUp (line 33) | def setUp(self):
method send_chunked_request (line 44) | def send_chunked_request(
method test_infer (line 83) | def test_infer(self):
method test_registry_index (line 95) | def test_registry_index(self):
method test_model_control (line 102) | def test_model_control(self):
method test_trace (line 115) | def test_trace(self):
method test_logging (line 124) | def test_logging(self):
method test_system_shm_register (line 131) | def test_system_shm_register(self):
method test_cuda_shm_register (line 138) | def test_cuda_shm_register(self):
method test_generate (line 145) | def test_generate(self):
FILE: qa/L0_http/http_restricted_api_test.py
class RestrictedAPITest (line 39) | class RestrictedAPITest(unittest.TestCase):
method setUp (line 40) | def setUp(self):
method test_sanity (line 45) | def test_sanity(self):
method test_model_repository (line 54) | def test_model_repository(self):
method test_metadata (line 67) | def test_metadata(self):
method test_infer (line 72) | def test_infer(self):
FILE: qa/L0_http/http_test.py
class HttpTest (line 45) | class HttpTest(tu.TestResultCollector):
method _get_infer_url (line 46) | def _get_infer_url(self, model_name):
method _get_load_model_url (line 49) | def _get_load_model_url(self, model_name):
method _raw_binary_helper (line 52) | def _raw_binary_helper(
method test_raw_binary (line 73) | def test_raw_binary(self):
method test_raw_binary_longer (line 78) | def test_raw_binary_longer(self):
method test_byte (line 84) | def test_byte(self):
method test_byte_too_many_elements (line 105) | def test_byte_too_many_elements(self):
method test_multi_variable_dimensions (line 124) | def test_multi_variable_dimensions(self):
method test_multi_inputs (line 145) | def test_multi_inputs(self):
method test_content_encoding_chunked_manually (line 172) | def test_content_encoding_chunked_manually(self):
method test_content_encoding_unsupported_client (line 192) | def test_content_encoding_unsupported_client(self):
method test_descriptive_status_code (line 215) | def test_descriptive_status_code(self):
method test_buffer_size_overflow (line 239) | def test_buffer_size_overflow(self):
method test_negative_dimensions (line 305) | def test_negative_dimensions(self):
method test_loading_large_invalid_model (line 337) | def test_loading_large_invalid_model(self):
method test_json_recursion_depth_limit (line 377) | def test_json_recursion_depth_limit(self):
FILE: qa/L0_http/python_http_aio_test.py
class TestHttpAioClient (line 34) | class TestHttpAioClient(unittest.IsolatedAsyncioTestCase):
method asyncSetUp (line 37) | async def asyncSetUp(self):
method asyncTearDown (line 40) | async def asyncTearDown(self):
method test_is_server_live (line 43) | async def test_is_server_live(self):
method test_is_server_ready (line 47) | async def test_is_server_ready(self):
method test_is_model_ready (line 51) | async def test_is_model_ready(self):
method test_get_server_metadata (line 55) | async def test_get_server_metadata(self):
method test_get_model_metadata (line 59) | async def test_get_model_metadata(self):
method test_get_model_config (line 63) | async def test_get_model_config(self):
method test_get_model_repository_index (line 67) | async def test_get_model_repository_index(self):
method test_load_model (line 71) | async def test_load_model(self):
method test_unload_model (line 78) | async def test_unload_model(self):
method test_get_inference_statistics (line 85) | async def test_get_inference_statistics(self):
method test_update_trace_settings (line 88) | async def test_update_trace_settings(self):
method test_get_trace_settings (line 91) | async def test_get_trace_settings(self):
method test_get_system_shared_memory_status (line 94) | async def test_get_system_shared_memory_status(self):
method test_register_system_shared_memory (line 97) | async def test_register_system_shared_memory(self):
method test_unregister_system_shared_memory (line 101) | async def test_unregister_system_shared_memory(self):
method test_get_cuda_shared_memory_status (line 104) | async def test_get_cuda_shared_memory_status(self):
method test_register_cuda_shared_memory (line 107) | async def test_register_cuda_shared_memory(self):
method test_unregister_cuda_shared_memory (line 111) | async def test_unregister_cuda_shared_memory(self):
FILE: qa/L0_http_fuzz/fuzztest.py
class FuzzTest (line 42) | class FuzzTest(tu.TestResultCollector):
method _run_fuzz (line 43) | def _run_fuzz(self, url, logger):
method test_failures_from_db (line 66) | def test_failures_from_db(self):
FILE: qa/L0_implicit_state/implicit_state.py
class ImplicitStateTest (line 44) | class ImplicitStateTest(tu.TestResultCollector):
method test_no_implicit_state (line 45) | def test_no_implicit_state(self):
method test_wrong_implicit_state_name (line 67) | def test_wrong_implicit_state_name(self):
method test_implicit_state_single_buffer (line 86) | def test_implicit_state_single_buffer(self):
method test_implicit_state_growable_memory (line 110) | def test_implicit_state_growable_memory(self):
method test_no_update (line 160) | def test_no_update(self):
method test_request_output_not_allowed (line 192) | def test_request_output_not_allowed(self):
method test_request_output (line 229) | def test_request_output(self):
FILE: qa/L0_infer/infer_test.py
class InferTest (line 64) | class InferTest(tu.TestResultCollector):
method _full_exact (line 65) | def _full_exact(
method test_raw_uuu (line 321) | def test_raw_uuu(self):
method test_raw_bbb (line 326) | def test_raw_bbb(self):
method test_raw_sss (line 331) | def test_raw_sss(self):
method test_raw_iii (line 336) | def test_raw_iii(self):
method test_raw_lll (line 341) | def test_raw_lll(self):
method test_raw_hhh (line 346) | def test_raw_hhh(self):
method test_raw_fff (line 356) | def test_raw_fff(self):
method test_raw_hff (line 366) | def test_raw_hff(self):
method test_raw_bii (line 376) | def test_raw_bii(self):
method test_raw_ibb (line 381) | def test_raw_ibb(self):
method test_raw_ibs (line 386) | def test_raw_ibs(self):
method test_raw_fuu (line 391) | def test_raw_fuu(self):
method test_raw_uff (line 401) | def test_raw_uff(self):
method test_raw_fuh (line 411) | def test_raw_fuh(self):
method test_raw_iff (line 421) | def test_raw_iff(self):
method test_raw_fii (line 431) | def test_raw_fii(self):
method test_raw_ihs (line 441) | def test_raw_ihs(self):
method test_raw_ooo (line 451) | def test_raw_ooo(self):
method test_raw_oii (line 461) | def test_raw_oii(self):
method test_raw_oio (line 471) | def test_raw_oio(self):
method test_raw_ooi (line 481) | def test_raw_ooi(self):
method test_raw_ioo (line 491) | def test_raw_ioo(self):
method test_raw_iio (line 501) | def test_raw_iio(self):
method test_raw_ioi (line 511) | def test_raw_ioi(self):
method test_class_bbb (line 524) | def test_class_bbb(self):
method test_class_sss (line 534) | def test_class_sss(self):
method test_class_iii (line 544) | def test_class_iii(self):
method test_class_lll (line 554) | def test_class_lll(self):
method test_class_fff (line 564) | def test_class_fff(self):
method test_class_iff (line 574) | def test_class_iff(self):
method test_mix_bbb (line 584) | def test_mix_bbb(self):
method test_mix_sss (line 594) | def test_mix_sss(self):
method test_mix_iii (line 604) | def test_mix_iii(self):
method test_mix_lll (line 614) | def test_mix_lll(self):
method test_mix_fff (line 624) | def test_mix_fff(self):
method test_mix_iff (line 634) | def test_mix_iff(self):
method test_raw_version_latest_1 (line 646) | def test_raw_version_latest_1(self):
method test_raw_version_latest_2 (line 713) | def test_raw_version_latest_2(self):
method test_raw_version_all (line 774) | def test_raw_version_all(self):
method test_raw_version_specific_1 (line 829) | def test_raw_version_specific_1(self):
method test_raw_version_specific_1_3 (line 896) | def test_raw_version_specific_1_3(self):
method test_ensemble_mix_platform (line 963) | def test_ensemble_mix_platform(self):
method test_ensemble_mix_type (line 984) | def test_ensemble_mix_type(self):
method test_ensemble_mix_ensemble (line 1002) | def test_ensemble_mix_ensemble(self):
method test_ensemble_mix_batch_nobatch (line 1025) | def test_ensemble_mix_batch_nobatch(self):
method test_ensemble_label_lookup (line 1074) | def test_ensemble_label_lookup(self):
FILE: qa/L0_infer_reshape/infer_reshape_test.py
class InferReshapeTest (line 46) | class InferReshapeTest(tu.TestResultCollector):
method _full_reshape (line 47) | def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_ba...
method _trt_reshape (line 201) | def _trt_reshape(self, dtype, input_shapes, output_shapes=None, no_bat...
method test_ff1 (line 248) | def test_ff1(self):
method test_ff2 (line 251) | def test_ff2(self):
method test_ff3 (line 255) | def test_ff3(self):
method test_ff4 (line 258) | def test_ff4(self):
method test_ii1 (line 270) | def test_ii1(self):
method test_ii2 (line 273) | def test_ii2(self):
method test_ii3 (line 278) | def test_ii3(self):
method test_oo1 (line 281) | def test_oo1(self):
FILE: qa/L0_infer_variable/infer_variable_test.py
class InferVariableTest (line 46) | class InferVariableTest(tu.TestResultCollector):
method _full_exact (line 47) | def _full_exact(
method test_raw_fff (line 229) | def test_raw_fff(self):
method test_raw_fii (line 232) | def test_raw_fii(self):
method test_raw_fll (line 235) | def test_raw_fll(self):
method test_raw_fil (line 238) | def test_raw_fil(self):
method test_raw_ffi (line 243) | def test_raw_ffi(self):
method test_raw_iii (line 246) | def test_raw_iii(self):
method test_faw_iif (line 249) | def test_faw_iif(self):
method test_raw_ooo (line 254) | def test_raw_ooo(self):
method test_raw_oii (line 259) | def test_raw_oii(self):
method test_raw_ooi (line 262) | def test_raw_ooi(self):
method test_raw_oio (line 267) | def test_raw_oio(self):
method test_class_fff (line 272) | def test_class_fff(self):
method test_class_fii (line 284) | def test_class_fii(self):
method test_class_fll (line 296) | def test_class_fll(self):
method test_class_fil (line 308) | def test_class_fil(self):
method test_class_ffi (line 320) | def test_class_ffi(self):
method test_class_iii (line 332) | def test_class_iii(self):
method test_class_iif (line 344) | def test_class_iif(self):
method test_mix_ffi (line 356) | def test_mix_ffi(self):
method test_mix_iii (line 368) | def test_mix_iii(self):
method test_mix_iif (line 380) | def test_mix_iif(self):
FILE: qa/L0_infer_zero/infer_zero_test.py
class InferZeroTest (line 51) | class InferZeroTest(tu.TestResultCollector):
method _full_zero (line 52) | def _full_zero(self, dtype, shapes):
method test_ff1_sanity (line 130) | def test_ff1_sanity(self):
method test_ff1 (line 140) | def test_ff1(self):
method test_ff3_sanity (line 150) | def test_ff3_sanity(self):
method test_ff3_0 (line 166) | def test_ff3_0(self):
method test_ff3_1 (line 182) | def test_ff3_1(self):
method test_ff3_2 (line 198) | def test_ff3_2(self):
method test_ff3_3 (line 214) | def test_ff3_3(self):
method test_ff3_4 (line 230) | def test_ff3_4(self):
method test_hh1_sanity (line 246) | def test_hh1_sanity(self):
method test_hh1_0 (line 249) | def test_hh1_0(self):
method test_hh1_1 (line 252) | def test_hh1_1(self):
method test_hh1_2 (line 255) | def test_hh1_2(self):
method test_hh3_sanity (line 258) | def test_hh3_sanity(self):
method test_hh3_0 (line 261) | def test_hh3_0(self):
method test_hh3_1 (line 264) | def test_hh3_1(self):
method test_hh3_2 (line 267) | def test_hh3_2(self):
method test_hh3_3 (line 270) | def test_hh3_3(self):
method test_hh3_4 (line 273) | def test_hh3_4(self):
method test_oo1_sanity (line 276) | def test_oo1_sanity(self):
method test_oo1 (line 286) | def test_oo1(self):
method test_oo3_sanity (line 296) | def test_oo3_sanity(self):
method test_oo3_0 (line 299) | def test_oo3_0(self):
method test_oo3_1 (line 302) | def test_oo3_1(self):
method test_oo3_2 (line 305) | def test_oo3_2(self):
method test_oo3_3 (line 308) | def test_oo3_3(self):
method test_oo3_4 (line 311) | def test_oo3_4(self):
method test_bb1_sanity (line 314) | def test_bb1_sanity(self):
method test_bb1_0 (line 324) | def test_bb1_0(self):
FILE: qa/L0_input_validation/input_validation_test.py
class InputValTest (line 41) | class InputValTest(unittest.TestCase):
method test_input_validation_required_empty (line 42) | def test_input_validation_required_empty(self):
method test_input_validation_optional_empty (line 56) | def test_input_validation_optional_empty(self):
method test_input_validation_required_missing (line 70) | def test_input_validation_required_missing(self):
method test_input_validation_optional (line 88) | def test_input_validation_optional(self):
method test_input_validation_all_optional (line 107) | def test_input_validation_all_optional(self):
class InputShapeTest (line 118) | class InputShapeTest(unittest.TestCase):
method test_input_shape_validation (line 119) | def test_input_shape_validation(self):
method test_input_string_shape_validation (line 155) | def test_input_string_shape_validation(self):
method test_wrong_input_shape_tensor_size (line 215) | def test_wrong_input_shape_tensor_size(self):
class ModelNameValidationTest (line 291) | class ModelNameValidationTest(unittest.TestCase):
method test_model_name_invalid_load (line 306) | def test_model_name_invalid_load(self):
method test_model_name_empty_load (line 318) | def test_model_name_empty_load(self):
method test_model_name_whitespace_only_load (line 327) | def test_model_name_whitespace_only_load(self):
method test_model_name_invalid_unload (line 339) | def test_model_name_invalid_unload(self):
method test_model_name_valid (line 352) | def test_model_name_valid(self):
FILE: qa/L0_input_validation/models/input_all_optional/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method initialize (line 34) | def initialize(self, args):
method execute (line 37) | def execute(self, requests):
FILE: qa/L0_input_validation/models/input_all_required/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method initialize (line 34) | def initialize(self, args):
method execute (line 37) | def execute(self, requests):
FILE: qa/L0_input_validation/models/input_optional/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method initialize (line 34) | def initialize(self, args):
method execute (line 37) | def execute(self, requests):
FILE: qa/L0_io/gen_libtorch_model.py
class SumModule (line 32) | class SumModule(nn.Module):
method __init__ (line 33) | def __init__(self, device):
method forward (line 37) | def forward(self, INPUT0, INPUT1):
class DiffModule (line 48) | class DiffModule(nn.Module):
method __init__ (line 49) | def __init__(self, device):
method forward (line 53) | def forward(self, INPUT0, INPUT1):
class TestModel (line 64) | class TestModel(nn.Module):
method __init__ (line 65) | def __init__(self, device0, device1):
method forward (line 73) | def forward(self, INPUT0, INPUT1):
FILE: qa/L0_iterative_sequence/iterative_sequence_e2e.py
class UserData (line 73) | class UserData:
method __init__ (line 74) | def __init__(self):
function callback (line 78) | def callback(user_data, result, error):
class IterativeSequenceTest (line 85) | class IterativeSequenceTest(tu.TestResultCollector):
method setUp (line 86) | def setUp(self):
method test_generate_stream (line 91) | def test_generate_stream(self):
method test_grpc_stream (line 106) | def test_grpc_stream(
method test_backlog_fill (line 136) | def test_backlog_fill(self):
method test_reschedule_error (line 144) | def test_reschedule_error(self):
method test_unsupported_sequence_scheduler (line 163) | def test_unsupported_sequence_scheduler(self):
method test_unsupported_dynamic_scheduler (line 186) | def test_unsupported_dynamic_scheduler(self):
FILE: qa/L0_java_memory_growth/MemoryGrowthTest.java
class MemoryGrowthTest (line 36) | public class MemoryGrowthTest {
method FAIL (line 42) | static void FAIL(String MSG)
method FAIL_IF_ERR (line 48) | static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
class TRITONSERVER_ServerDeleter (line 65) | static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
method TRITONSERVER_ServerDeleter (line 66) | public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
class DeleteDeallocator (line 71) | protected static class DeleteDeallocator
method DeleteDeallocator (line 73) | DeleteDeallocator(Pointer p) { super(p); }
method deallocate (line 74) | @Override public void deallocate() { TRITONSERVER_ServerDelete(thi...
method Usage (line 78) | static void Usage(String msg)
class ResponseAlloc (line 101) | static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAlloc...
method call (line 102) | @Override
class ResponseRelease (line 141) | static class ResponseRelease
method call (line 143) | @Override
class InferRequestComplete (line 162) | static class InferRequestComplete
method call (line 164) | @Override
class InferResponseComplete (line 172) | static class InferResponseComplete
method call (line 174) | @Override
method ParseModelMetadata (line 194) | static TRITONSERVER_Error ParseModelMetadata(
method GenerateInputData (line 238) | static void GenerateInputData(
method GenerateInputData (line 249) | static void GenerateInputData(
method CompareResult (line 260) | static void CompareResult(
method CompareResult (line 274) | static void CompareResult(
method Check (line 288) | static void Check(
method ValidateMemoryGrowth (line 381) | static boolean ValidateMemoryGrowth(
method RunInference (line 443) | static void RunInference(
method main (line 667) | public static void main(String[] args) throws Exception
FILE: qa/L0_java_resnet/ResnetTest.java
class ResnetTest (line 36) | public class ResnetTest {
type Backend (line 44) | private enum Backend {
method FAIL (line 51) | static void FAIL(String MSG)
method FAIL_IF_ERR (line 57) | static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
class TRITONSERVER_ServerDeleter (line 71) | static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
method TRITONSERVER_ServerDeleter (line 72) | public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
class DeleteDeallocator (line 77) | protected static class DeleteDeallocator
method DeleteDeallocator (line 79) | DeleteDeallocator(Pointer p) { super(p); }
method deallocate (line 80) | @Override public void deallocate() { TRITONSERVER_ServerDelete(thi...
method Usage (line 84) | static void Usage(String msg)
class ResponseAlloc (line 103) | static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAlloc...
method call (line 104) | @Override
class ResponseRelease (line 150) | static class ResponseRelease
method call (line 152) | @Override
class InferRequestComplete (line 172) | static class InferRequestComplete
method call (line 174) | @Override
class InferResponseComplete (line 182) | static class InferResponseComplete
method call (line 184) | @Override
method GenerateInputData (line 204) | static void GenerateInputData(FloatPointer[] input_data)
method AreValidResults (line 213) | static boolean AreValidResults(
method Check (line 233) | static void Check(
method PerformInference (line 337) | static void PerformInference(
method main (line 488) | public static void main(String[] args) throws Exception
FILE: qa/L0_java_sequence_batcher/SequenceTest.java
class SequenceTest (line 36) | public class SequenceTest {
method FAIL (line 38) | static void FAIL(String MSG)
method FAIL_IF_ERR (line 44) | static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
class TRITONSERVER_ServerDeleter (line 57) | static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
method TRITONSERVER_ServerDeleter (line 58) | public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
class DeleteDeallocator (line 63) | protected static class DeleteDeallocator
method DeleteDeallocator (line 65) | DeleteDeallocator(Pointer p) { super(p); }
method deallocate (line 66) | @Override public void deallocate() { TRITONSERVER_ServerDelete(thi...
method Usage (line 70) | static void Usage(String msg)
class ResponseAlloc (line 85) | static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAlloc...
method call (line 86) | @Override
class ResponseRelease (line 130) | static class ResponseRelease
method call (line 132) | @Override
class InferRequestComplete (line 156) | static class InferRequestComplete
method call (line 158) | @Override
class InferResponseComplete (line 166) | static class InferResponseComplete
method call (line 168) | @Override
method ParseModelMetadata (line 188) | static TRITONSERVER_Error ParseModelMetadata(
method SetSequenceMetadata (line 228) | static void SetSequenceMetadata(
method GetExpectedResult (line 250) | static int GetExpectedResult(
method Check (line 267) | static void Check(
method main (line 357) | public static void main(String[] args) throws Exception
FILE: qa/L0_large_payload/large_payload_test.py
class LargePayLoadTest (line 43) | class LargePayLoadTest(tu.TestResultCollector):
method setUp (line 44) | def setUp(self):
method _test_helper (line 74) | def _test_helper(
method test_onnx (line 137) | def test_onnx(self):
method test_python (line 143) | def test_python(self):
method test_plan (line 149) | def test_plan(self):
method test_libtorch (line 155) | def test_libtorch(self):
method test_custom (line 161) | def test_custom(self):
FILE: qa/L0_libtorch_instance_group_kind_model/client.py
class InferTest (line 44) | class InferTest(tu.TestResultCollector):
method test_infer (line 45) | def test_infer(self):
FILE: qa/L0_libtorch_instance_group_kind_model/gen_models.py
class SumModule (line 32) | class SumModule(nn.Module):
method __init__ (line 33) | def __init__(self, device):
method forward (line 37) | def forward(self, INPUT0, INPUT1):
class DiffModule (line 48) | class DiffModule(nn.Module):
method __init__ (line 49) | def __init__(self, device):
method forward (line 53) | def forward(self, INPUT0, INPUT1):
class TestModel (line 64) | class TestModel(nn.Module):
method __init__ (line 65) | def __init__(self, device0, device1):
method forward (line 73) | def forward(self, INPUT0, INPUT1):
FILE: qa/L0_libtorch_io_names/io_names_client.py
class IONamingConvention (line 40) | class IONamingConvention(tu.TestResultCollector):
method _infer_helper (line 41) | def _infer_helper(self, model_name, io_names, reversed_order=False):
method test_io_index (line 85) | def test_io_index(self):
method test_output_index (line 89) | def test_output_index(self):
method test_no_output_index (line 93) | def test_no_output_index(self):
method test_no_arguments_no_output_index (line 97) | def test_no_arguments_no_output_index(self):
method test_mix_index (line 101) | def test_mix_index(self):
method test_mix_arguments (line 105) | def test_mix_arguments(self):
method test_mix_arguments_index (line 109) | def test_mix_arguments_index(self):
method test_unordered_index (line 113) | def test_unordered_index(self):
FILE: qa/L0_libtorch_shared_weights/libtorch_shared_weights_test.py
class SharedWeightsTest (line 43) | class SharedWeightsTest(tu.TestResultCollector):
method _full_exact (line 44) | def _full_exact(self, model_name, request_concurrency, shape):
method test_pytorch_identity_model (line 68) | def test_pytorch_identity_model(self):
FILE: qa/L0_lifecycle/lifecycle_test.py
class LifeCycleTest (line 55) | class LifeCycleTest(tu.TestResultCollector):
method _infer_success_models (line 56) | def _infer_success_models(
method _infer_success_identity (line 91) | def _infer_success_identity(self, model_base, versions, tensor_dtype, ...
method _get_client (line 121) | def _get_client(self, use_grpc=False):
method _async_load (line 132) | def _async_load(self, model_name, use_grpc):
method test_parse_error_noexit (line 139) | def test_parse_error_noexit(self):
method test_parse_error_modelfail (line 168) | def test_parse_error_modelfail(self):
method test_parse_error_modelfail_nostrict (line 231) | def test_parse_error_modelfail_nostrict(self):
method test_parse_error_no_model_config (line 294) | def test_parse_error_no_model_config(self):
method test_init_error_modelfail (line 346) | def test_init_error_modelfail(self):
method test_parse_error_model_no_version (line 392) | def test_parse_error_model_no_version(self):
method test_parse_ignore_zero_prefixed_version (line 465) | def test_parse_ignore_zero_prefixed_version(self):
method test_parse_ignore_non_intergral_version (line 499) | def test_parse_ignore_non_intergral_version(self):
method test_dynamic_model_load_unload (line 533) | def test_dynamic_model_load_unload(self):
method test_dynamic_model_load_unload_disabled (line 752) | def test_dynamic_model_load_unload_disabled(self):
method test_dynamic_version_load_unload (line 848) | def test_dynamic_version_load_unload(self):
method test_dynamic_version_load_unload_disabled (line 984) | def test_dynamic_version_load_unload_disabled(self):
method test_dynamic_model_modify (line 1045) | def test_dynamic_model_modify(self):
method test_dynamic_file_delete (line 1172) | def test_dynamic_file_delete(self):
method test_multiple_model_repository_polling (line 1267) | def test_multiple_model_repository_polling(self):
method test_multiple_model_repository_control (line 1312) | def test_multiple_model_repository_control(self):
method test_model_control (line 1419) | def test_model_control(self):
method test_model_control_fail (line 1604) | def test_model_control_fail(self):
method test_model_control_ensemble (line 1640) | def test_model_control_ensemble(self):
method test_load_same_model_different_platform (line 1740) | def test_load_same_model_different_platform(self):
method test_model_availability_on_reload (line 1810) | def test_model_availability_on_reload(self):
method test_model_availability_on_reload_2 (line 1869) | def test_model_availability_on_reload_2(self):
method test_model_availability_on_reload_3 (line 1928) | def test_model_availability_on_reload_3(self):
method test_model_reload_fail (line 1985) | def test_model_reload_fail(self):
method test_multiple_model_repository_control_startup_models (line 2031) | def test_multiple_model_repository_control_startup_models(self):
method test_model_repository_index (line 2256) | def test_model_repository_index(self):
method test_config_override (line 2327) | def test_config_override(self):
method test_file_override (line 2402) | def test_file_override(self):
method test_file_override_security (line 2557) | def test_file_override_security(self):
method test_shutdown_dynamic (line 2632) | def test_shutdown_dynamic(self):
method test_shutdown_sequence (line 2692) | def test_shutdown_sequence(self):
method test_shutdown_ensemble (line 2778) | def test_shutdown_ensemble(self):
method test_load_gpu_limit (line 2837) | def test_load_gpu_limit(self):
method test_concurrent_model_load_speedup (line 2868) | def test_concurrent_model_load_speedup(self):
method test_concurrent_model_load (line 2914) | def test_concurrent_model_load(self):
method test_concurrent_model_load_unload (line 2943) | def test_concurrent_model_load_unload(self):
method test_concurrent_same_model_load_unload_stress (line 3005) | def test_concurrent_same_model_load_unload_stress(self):
method test_concurrent_model_instance_load_speedup (line 3087) | def test_concurrent_model_instance_load_speedup(self):
method _call_with_timeout (line 3126) | def _call_with_timeout(self, callable, timeout_secs):
method _call_with_expected_timeout (line 3136) | def _call_with_expected_timeout(self, callable, timeout_secs=3):
method _get_fp32_io (line 3148) | def _get_fp32_io(self, client_type):
method test_concurrent_model_instance_load_sanity (line 3168) | def test_concurrent_model_instance_load_sanity(self):
method test_model_config_overwite (line 3270) | def test_model_config_overwite(self):
method test_shutdown_while_background_unloading (line 3329) | def test_shutdown_while_background_unloading(self):
method test_shutdown_while_loading (line 3350) | def test_shutdown_while_loading(self):
method test_shutdown_with_live_connection (line 3362) | def test_shutdown_with_live_connection(self):
method test_add_custom_config (line 3404) | def test_add_custom_config(self):
method test_delete_custom_config (line 3450) | def test_delete_custom_config(self):
method test_load_new_model_version (line 3492) | def test_load_new_model_version(self):
FILE: qa/L0_lifecycle/retry_model/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method auto_complete_config (line 35) | def auto_complete_config(auto_complete_model_config):
method initialize (line 49) | def initialize(self, args):
method finalize (line 72) | def finalize(self):
method execute (line 76) | def execute(self, requests):
FILE: qa/L0_logging/log_format_test.py
class LogInjectionError (line 173) | class LogInjectionError(Exception):
function parse_timestamp (line 177) | def parse_timestamp(timestamp):
function validator (line 188) | def validator(func):
function validate_level (line 194) | def validate_level(level, _):
function validate_month (line 199) | def validate_month(month, _):
function validate_day (line 206) | def validate_day(day, _):
function validate_ISO8601_timestamp (line 213) | def validate_ISO8601_timestamp(timestamp, _):
function validate_timestamp (line 218) | def validate_timestamp(timestamp, _):
function validate_pid (line 223) | def validate_pid(pid, _):
function validate_file (line 228) | def validate_file(file_, _):
function validate_line (line 233) | def validate_line(line, _):
function split_row (line 237) | def split_row(row):
function validate_protobuf (line 241) | def validate_protobuf(protobuf):
function validate_table (line 250) | def validate_table(table_rows):
function validate_message (line 282) | def validate_message(message, escaped):
class TestLogFormat (line 353) | class TestLogFormat:
method _setup (line 355) | def _setup(self, request):
method _shutdown_server (line 369) | def _shutdown_server(self):
method _launch_server (line 374) | def _launch_server(self, escaped=None):
method _validate_log_record (line 411) | def _validate_log_record(self, record, format_regex, escaped):
method _parse_log_file (line 425) | def _parse_log_file(self, file_path, format_regex):
method _validate_log_file (line 443) | def _validate_log_file(self, file_path, format_regex, escaped):
method _detect_injection (line 448) | def _detect_injection(self, log_records, injected_record):
method test_format (line 460) | def test_format(self, log_format, format_regex):
method test_injection (line 474) | def test_injection(self, log_format, format_regex, injected_record):
FILE: qa/L0_logging/logging_endpoint_test.py
class LogEndpointTest (line 45) | class LogEndpointTest(tu.TestResultCollector):
method tearDown (line 46) | def tearDown(self):
method check_server_initial_state (line 62) | def check_server_initial_state(self):
method test_http_get_settings (line 79) | def test_http_get_settings(self):
method test_grpc_get_settings (line 97) | def test_grpc_get_settings(self):
method test_http_update_settings (line 123) | def test_http_update_settings(self):
method test_grpc_update_settings (line 222) | def test_grpc_update_settings(self):
FILE: qa/L0_long_running_stress/crashing_client.py
function crashing_client (line 43) | def crashing_client(
FILE: qa/L0_long_running_stress/scenarios.py
class TimeoutException (line 62) | class TimeoutException(Exception):
function completion_callback (line 67) | def completion_callback(user_data, result, error):
class Scenario (line 72) | class Scenario(metaclass=abc.ABCMeta):
method __init__ (line 73) | def __init__(self, name, trials, verbose=False, out_stream=sys.stdout):
method scenario_name (line 79) | def scenario_name(self):
method get_trial (line 82) | def get_trial(self):
method get_datatype (line 85) | def get_datatype(self, trial):
method run (line 96) | def run(self, client_metadata):
class PerfAnalyzerScenario (line 100) | class PerfAnalyzerScenario(Scenario):
class ModelOption (line 105) | class ModelOption:
method __init__ (line 113) | def __init__(
method run (line 129) | def run(self, name, sequence_id_range, out_stream):
method __init__ (line 184) | def __init__(
method generate_sequence_data (line 266) | def generate_sequence_data(self, trial, dtype, data_filename):
method generate_identity_data (line 304) | def generate_identity_data(self, trial, dtype, data_filename):
method run (line 329) | def run(self, client_metadata):
class ResNetScenario (line 334) | class ResNetScenario(Scenario):
method __init__ (line 335) | def __init__(self, name, batch_size=32, verbose=False, out_stream=sys....
method preprocess (line 346) | def preprocess(self, filename):
method postprocess (line 355) | def postprocess(self, results):
method run (line 377) | def run(self, client_metadata):
class TimeoutScenario (line 393) | class TimeoutScenario(Scenario):
method __init__ (line 394) | def __init__(
method run (line 407) | def run(self, client_metadata):
class CrashingScenario (line 439) | class CrashingScenario(Scenario):
method __init__ (line 440) | def __init__(self, name, verbose=False, out_stream=sys.stdout):
method run (line 443) | def run(self, client_metadata):
method parse_result (line 459) | def parse_result(self, log):
class SequenceScenario (line 479) | class SequenceScenario(Scenario):
class UserData (line 480) | class UserData:
method __init__ (line 481) | def __init__(self):
method check_constraints (line 488) | def check_constraints(self, model_name, sequence_id):
method __init__ (line 491) | def __init__(
method get_expected_result (line 504) | def get_expected_result(self, expected_result, value, trial, flag_str=...
method check_sequence_async (line 519) | def check_sequence_async(
class SequenceNoEndScenario (line 646) | class SequenceNoEndScenario(SequenceScenario):
method __init__ (line 647) | def __init__(
method check_constraints (line 658) | def check_constraints(self, model_name, sequence_id):
method run (line 662) | def run(
class SequenceValidNoEndScenario (line 715) | class SequenceValidNoEndScenario(SequenceScenario):
method __init__ (line 716) | def __init__(
method check_constraints (line 727) | def check_constraints(self, model_name, sequence_id):
method run (line 731) | def run(
class SequenceValidValidScenario (line 795) | class SequenceValidValidScenario(SequenceScenario):
method __init__ (line 796) | def __init__(
method check_constraints (line 807) | def check_constraints(self, model_name, sequence_id):
method run (line 811) | def run(
class SequenceNoStartScenario (line 875) | class SequenceNoStartScenario(SequenceScenario):
method __init__ (line 876) | def __init__(
method check_constraints (line 887) | def check_constraints(self, model_name, sequence_id):
method run (line 898) | def run(self, client_metadata):
class SequenceValidScenario (line 947) | class SequenceValidScenario(SequenceScenario):
method __init__ (line 948) | def __init__(
method check_constraints (line 959) | def check_constraints(self, model_name, sequence_id):
method run (line 963) | def run(
FILE: qa/L0_long_running_stress/stress.py
function get_trials (line 65) | def get_trials(is_sequence=True):
function update_test_count (line 80) | def update_test_count(
class ScenarioSelector (line 108) | class ScenarioSelector:
method __init__ (line 109) | def __init__(self, probs, rng):
method get_scenario (line 124) | def get_scenario(self):
function stress_thread (line 128) | def stress_thread(
function load_thread (line 304) | def load_thread(
function format_content (line 370) | def format_content(content, max_line_length):
function accumulate_count (line 389) | def accumulate_count(dict_list, test_case_name):
function generate_report (line 398) | def generate_report(
FILE: qa/L0_memory/client.py
class UserData (line 40) | class UserData:
method __init__ (line 41) | def __init__(self):
function callback (line 45) | def callback(user_data, result, error):
class TestTritonInference (line 52) | class TestTritonInference(unittest.TestCase):
method setUp (line 53) | def setUp(self):
method tearDown (line 56) | def tearDown(self):
method test_inference (line 59) | def test_inference(self):
FILE: qa/L0_metrics/cpu_metrics_test.py
function get_metrics (line 45) | def get_metrics():
class TestCpuMetrics (line 65) | class TestCpuMetrics(unittest.TestCase):
method setUp (line 66) | def setUp(self):
method _validate_metric_variance (line 83) | def _validate_metric_variance(self, observed_metrics: dict):
method _collect_metrics (line 124) | def _collect_metrics(self, observed_metrics, interval_secs=1):
method test_cpu_metrics_during_inference (line 139) | def test_cpu_metrics_during_inference(self):
method test_cpu_metrics_ranges (line 175) | def test_cpu_metrics_ranges(self):
FILE: qa/L0_metrics/ensemble_decoupled/async_execute_decouple/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method execute (line 34) | async def execute(self, requests):
FILE: qa/L0_metrics/histogram_metrics_test.py
function get_histogram_metric_key (line 46) | def get_histogram_metric_key(
class TestHistogramMetrics (line 57) | class TestHistogramMetrics(tu.TestResultCollector):
method setUp (line 58) | def setUp(self):
method get_metrics (line 61) | def get_metrics(self):
method get_histogram_metrics (line 66) | def get_histogram_metrics(self, metric_family: str):
method async_stream_infer (line 82) | def async_stream_infer(self, model_name, inputs, outputs, responses_pe...
method test_ensemble_decoupled (line 114) | def test_ensemble_decoupled(self):
method test_buckets_override (line 182) | def test_buckets_override(self):
FILE: qa/L0_metrics/metrics_config_test.py
class MetricsConfigTest (line 68) | class MetricsConfigTest(tu.TestResultCollector):
method _get_metrics (line 69) | def _get_metrics(self):
method test_pinned_memory_metrics_exist (line 75) | def test_pinned_memory_metrics_exist(self):
method test_inf_counters_exist (line 81) | def test_inf_counters_exist(self):
method test_inf_counters_missing (line 86) | def test_inf_counters_missing(self):
method test_cache_counters_exist (line 91) | def test_cache_counters_exist(self):
method test_cache_counters_missing (line 96) | def test_cache_counters_missing(self):
method test_inf_histograms_exist (line 102) | def test_inf_histograms_exist(self):
method test_inf_histograms_missing (line 108) | def test_inf_histograms_missing(self):
method test_inf_summaries_exist (line 114) | def test_inf_summaries_exist(self):
method test_inf_summaries_missing (line 119) | def test_inf_summaries_missing(self):
method test_cache_summaries_exist (line 124) | def test_cache_summaries_exist(self):
method test_cache_summaries_missing (line 129) | def test_cache_summaries_missing(self):
method test_summaries_custom_quantiles (line 134) | def test_summaries_custom_quantiles(self):
method test_inf_summaries_exist_with_cache (line 147) | def test_inf_summaries_exist_with_cache(self):
method test_model_namespacing_label_with_namespace_on (line 156) | def test_model_namespacing_label_with_namespace_on(self):
method test_model_namespacing_label_with_namespace_off (line 166) | def test_model_namespacing_label_with_namespace_off(self):
FILE: qa/L0_metrics/metrics_queue_size_test.py
class MetricsPendingRequestCountTest (line 53) | class MetricsPendingRequestCountTest(tu.TestResultCollector):
method setUp (line 54) | def setUp(self):
method _validate_model_config (line 92) | def _validate_model_config(self, model_name, max_queue_size=0):
method _get_metrics (line 109) | def _get_metrics(self):
method _get_metric_line (line 114) | def _get_metric_line(self, metric, metrics):
method _get_metric_value (line 120) | def _get_metric_value(self, metric):
method _assert_metric_equals (line 130) | def _assert_metric_equals(self, metric, expected_value):
method _assert_metric_greater_than (line 134) | def _assert_metric_greater_than(self, metric, gt_value):
method _send_async_requests (line 138) | def _send_async_requests(self, model_name, inputs, futures):
method _send_async_requests_sequence (line 142) | def _send_async_requests_sequence(self, num_seq_slots, model_name, inp...
method _test_helper (line 164) | def _test_helper(
method test_default_scheduler (line 215) | def test_default_scheduler(self):
method test_dynamic_batch_scheduler (line 221) | def test_dynamic_batch_scheduler(self):
method test_fail_max_queue_size (line 227) | def test_fail_max_queue_size(self):
method test_sequence_batch_scheduler_direct (line 238) | def test_sequence_batch_scheduler_direct(self):
method test_sequence_batch_scheduler_oldest (line 247) | def test_sequence_batch_scheduler_oldest(self):
method test_ensemble_scheduler (line 255) | def test_ensemble_scheduler(self):
FILE: qa/L0_metrics/model_namespacing_repos/addsub_repo/composing_model/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method auto_complete_config (line 37) | def auto_complete_config(auto_complete_model_config):
method initialize (line 81) | def initialize(self, args):
method execute (line 94) | def execute(self, requests):
method addsub (line 104) | def addsub(self, in_0, in_1):
FILE: qa/L0_metrics/model_namespacing_repos/subadd_repo/composing_model/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method auto_complete_config (line 37) | def auto_complete_config(auto_complete_model_config):
method initialize (line 81) | def initialize(self, args):
method execute (line 94) | def execute(self, requests):
method subadd (line 104) | def subadd(self, in_0, in_1):
FILE: qa/L0_metrics/pinned_memory_metrics_test.py
function get_metrics (line 51) | def get_metrics():
class TestPinnedMemoryMetrics (line 67) | class TestPinnedMemoryMetrics(unittest.TestCase):
method setUp (line 68) | def setUp(self):
method _assert_pinned_memory_utilization (line 93) | def _assert_pinned_memory_utilization(self):
method _collect_metrics (line 98) | def _collect_metrics(self):
method test_pinned_memory_metrics_asynchronous_requests (line 105) | def test_pinned_memory_metrics_asynchronous_requests(self):
method test_pinned_memory_metrics_synchronous_requests (line 145) | def test_pinned_memory_metrics_synchronous_requests(self):
FILE: qa/L0_mlflow/plugin_test.py
class PluginTest (line 43) | class PluginTest(tu.TestResultCollector):
method setUp (line 44) | def setUp(self):
method _validate_deployment (line 47) | def _validate_deployment(self, model_name):
method test_onnx_flavor (line 82) | def test_onnx_flavor(self):
method test_onnx_flavor_with_files (line 93) | def test_onnx_flavor_with_files(self):
method test_model_name (line 117) | def test_model_name(self):
FILE: qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 44) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 45) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 44) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_dims/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 44) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/input_missing_name/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 44) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/input_wrong_property/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 49) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 46) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 45) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/no_return/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 42) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 44) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_dims/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 44) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/output_missing_name/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 44) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform/python/output_wrong_property/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 49) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 41) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 41) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 41) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 45) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 45) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 42) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 45) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 45) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 45) | def execute(self, requests):
FILE: qa/L0_model_config/autofill_noplatform_success/python/optional_input/model.py
class TritonPythonModel (line 28) | class TritonPythonModel:
method auto_complete_config (line 30) | def auto_complete_config(auto_complete_model_config):
method execute (line 47) | def execute(self, requests):
FILE: qa/L0_model_namespacing/python_addsub/__init__.py
class TritonPythonModel (line 35) | class TritonPythonModel:
method auto_complete_config (line 39) | def auto_complete_config(auto_complete_model_config):
method initialize (line 83) | def initialize(self, args):
method execute (line 96) | def execute(self, requests):
method addsub (line 106) | def addsub(self, in_0, in_1):
FILE: qa/L0_model_namespacing/python_subadd/__init__.py
class TritonPythonModel (line 35) | class TritonPythonModel:
method auto_complete_config (line 39) | def auto_complete_config(auto_complete_model_config):
method initialize (line 83) | def initialize(self, args):
method execute (line 96) | def execute(self, requests):
method subadd (line 106) | def subadd(self, in_0, in_1):
FILE: qa/L0_model_namespacing/test.py
class AddSubChecker (line 51) | class AddSubChecker:
method __init__ (line 55) | def __init__(self, checker_client=None):
method infer (line 78) | def infer(self, model):
class SubAddChecker (line 92) | class SubAddChecker(AddSubChecker):
method infer (line 93) | def infer(self, model):
class ModelNamespacePoll (line 108) | class ModelNamespacePoll(tu.TestResultCollector):
method setUp (line 109) | def setUp(self):
method check_health (line 115) | def check_health(self, expect_live=True, expect_ready=True):
method test_no_duplication (line 119) | def test_no_duplication(self):
method test_duplication (line 131) | def test_duplication(self):
method test_ensemble_duplication (line 156) | def test_ensemble_duplication(self):
method test_dynamic_resolution (line 182) | def test_dynamic_resolution(self):
class ModelNamespaceExplicit (line 226) | class ModelNamespaceExplicit(tu.TestResultCollector):
method setUp (line 227) | def setUp(self):
method check_health (line 233) | def check_health(self, expect_live=True, expect_ready=True):
method test_no_duplication (line 237) | def test_no_duplication(self):
method test_duplication (line 252) | def test_duplication(self):
method test_ensemble_duplication (line 280) | def test_ensemble_duplication(self):
method test_dynamic_resolution (line 309) | def test_dynamic_resolution(self):
FILE: qa/L0_model_queue/model_queue_test.py
class ModelQueueTest (line 52) | class ModelQueueTest(tu.TestResultCollector):
method setUp (line 53) | def setUp(self):
method add_deferred_exception (line 61) | def add_deferred_exception(self, ex):
method check_deferred_exception (line 66) | def check_deferred_exception(self):
method _get_metrics (line 74) | def _get_metrics(self):
method _metrics_before_test (line 80) | def _metrics_before_test(self, model, reason):
method _assert_metrics (line 89) | def _assert_metrics(
method check_response (line 97) | def check_response(
method test_max_queue_size (line 158) | def test_max_queue_size(self):
method test_policy_delay (line 214) | def test_policy_delay(self):
method test_policy_reject (line 258) | def test_policy_reject(self):
method test_timeout_override (line 335) | def test_timeout_override(self):
method test_priority_levels (line 490) | def test_priority_levels(self):
method test_max_priority_levels (line 535) | def test_max_priority_levels(self):
method test_priority_with_policy (line 581) | def test_priority_with_policy(self):
FILE: qa/L0_model_update/instance_update_test.py
class TestInstanceUpdate (line 51) | class TestInstanceUpdate(unittest.TestCase):
method setUp (line 54) | def setUp(self):
method tearDown (line 68) | def tearDown(self):
method _get_inputs (line 79) | def _get_inputs(self, batching=False):
method _infer (line 89) | def _infer(self, batching=False):
method _concurrent_infer (line 92) | def _concurrent_infer(self, concurrency=4, batching=False):
method _check_count (line 109) | def _check_count(self, kind, expected_count, poll=False):
method _load_model (line 121) | def _load_model(self, instance_count, instance_config="", batching=Fal...
method _update_instance_count (line 129) | def _update_instance_count(
method _unload_model (line 156) | def _unload_model(self, batching=False):
method test_add_rm_add_instance_no_batching (line 165) | def test_add_rm_add_instance_no_batching(self):
method test_add_rm_add_instance_with_batching (line 175) | def test_add_rm_add_instance_with_batching(self):
method test_rm_add_rm_instance_no_batching (line 185) | def test_rm_add_rm_instance_no_batching(self):
method test_rm_add_rm_instance_with_batching (line 195) | def test_rm_add_rm_instance_with_batching(self):
method test_rm_instance_to_zero (line 205) | def test_rm_instance_to_zero(self):
method test_cpu_instance_update (line 213) | def test_cpu_instance_update(self):
method test_gpu_instance_update (line 224) | def test_gpu_instance_update(self):
method test_gpu_cpu_instance_update (line 231) | def test_gpu_cpu_instance_update(self):
method test_instance_name_update (line 253) | def test_instance_name_update(self):
method test_instance_signature (line 269) | def test_instance_signature(self):
method test_invalid_config (line 299) | def test_invalid_config(self):
method test_model_file_update (line 312) | def test_model_file_update(self):
method test_non_instance_config_update (line 321) | def test_non_instance_config_update(self):
method test_load_api_with_config (line 334) | def test_load_api_with_config(self):
method test_update_while_inferencing (line 359) | def test_update_while_inferencing(self):
method test_infer_while_updating (line 388) | def test_infer_while_updating(self):
method test_instance_resource_increase (line 421) | def test_instance_resource_increase(self):
method test_instance_resource_increase_above_explicit (line 456) | def test_instance_resource_increase_above_explicit(self):
method test_instance_resource_decrease (line 483) | def test_instance_resource_decrease(self):
method test_direct_scheduler_update_no_ongoing_sequences (line 525) | def test_direct_scheduler_update_no_ongoing_sequences(self):
method test_direct_scheduler_update_with_ongoing_sequences (line 531) | def test_direct_scheduler_update_with_ongoing_sequences(self):
method test_oldest_scheduler_update_no_ongoing_sequences (line 537) | def test_oldest_scheduler_update_no_ongoing_sequences(self):
method test_oldest_scheduler_update_with_ongoing_sequences (line 543) | def test_oldest_scheduler_update_with_ongoing_sequences(self):
method _test_scheduler_update_no_ongoing_sequences (line 550) | def _test_scheduler_update_no_ongoing_sequences(self, sequence_batchin...
method _test_scheduler_update_with_ongoing_sequences (line 598) | def _test_scheduler_update_with_ongoing_sequences(self, sequence_batch...
FILE: qa/L0_nan_inf/models/nan_inf_output/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method initialize (line 34) | def initialize(self, args):
method execute (line 37) | def execute(self, requests):
FILE: qa/L0_nan_inf/nan_inf_test.py
class NanInfTest (line 44) | class NanInfTest(tu.TestResultCollector):
method test_http_raw (line 48) | def test_http_raw(self):
method test_http (line 68) | def test_http(self):
method test_grpc (line 74) | def test_grpc(self):
method infer_helper (line 80) | def infer_helper(self, triton_client, inputs):
FILE: qa/L0_openai/generate_engine.py
function generate_model_engine (line 34) | def generate_model_engine(model: str, engines_path: str):
FILE: qa/L0_optional_input/models/optional_identity/1/model.py
class TritonPythonModel (line 30) | class TritonPythonModel:
method execute (line 31) | def execute(self, requests):
FILE: qa/L0_optional_input/optional_input_test.py
class OptionalInputTest (line 47) | class OptionalInputTest(tu.TestResultCollector):
method setUp (line 48) | def setUp(self):
method add_deferred_exception (line 74) | def add_deferred_exception(self, ex):
method check_deferred_exception (line 79) | def check_deferred_exception(self):
method check_response (line 85) | def check_response(self, thresholds, provided_inputs=("INPUT0", "INPUT...
method check_status (line 136) | def check_status(self, model_name, batch_exec, request_cnt, infer_cnt):
method test_all_inputs (line 207) | def test_all_inputs(self):
method test_optional_same_input (line 227) | def test_optional_same_input(self):
method test_optional_mix_inputs (line 256) | def test_optional_mix_inputs(self):
method test_optional_mix_inputs_2 (line 303) | def test_optional_mix_inputs_2(self):
method test_ensemble_all_inputs (line 342) | def test_ensemble_all_inputs(self):
method test_ensemble_optional_same_input (line 349) | def test_ensemble_optional_same_input(self):
method test_ensemble_optional_mix_inputs (line 356) | def test_ensemble_optional_mix_inputs(self):
method test_ensemble_optional_mix_inputs_2 (line 363) | def test_ensemble_optional_mix_inputs_2(self):
method test_ensemble_optional_pipeline (line 370) | def test_ensemble_optional_pipeline(self):
method test_ensemble_optional_connecting_tensor (line 409) | def test_ensemble_optional_connecting_tensor(self):
FILE: qa/L0_orca/orca_http_test.py
function get_endpoint_header (line 41) | def get_endpoint_header(url, data, request_header=None):
function parse_header_data (line 60) | def parse_header_data(header, orca_format):
function check_for_keys (line 95) | def check_for_keys(data, desired_keys, orca_format):
function request_header (line 109) | def request_header(orca_format):
function test_header_type (line 113) | def test_header_type(url, data, orca_format):
FILE: qa/L0_output_name/output_name_test.py
class OutputNameValidationTest (line 42) | class OutputNameValidationTest(tu.TestResultCollector):
method requestGenerator (line 43) | def requestGenerator(self, model_name, output_name):
method test_grpc (line 63) | def test_grpc(self):
FILE: qa/L0_output_validation/lt_op_val_client.py
class OutputValidationTest (line 39) | class OutputValidationTest(tu.TestResultCollector):
method test_datatype (line 41) | def test_datatype(self):
method test_index (line 53) | def test_index(self):
method test_success (line 65) | def test_success(self):
FILE: qa/L0_parallel_copy/parallel_copy_test.py
class ParallelCopyTest (line 44) | class ParallelCopyTest(tu.TestResultCollector):
method setUp (line 45) | def setUp(self):
method _batch_input_duration (line 50) | def _batch_input_duration(self, batch_size):
method _run (line 72) | def _run(self, batch_sizes):
method test_performance (line 140) | def test_performance(self):
FILE: qa/L0_parameters/class_count_test.py
class ClassificationParameterTest (line 43) | class ClassificationParameterTest(tu.TestResultCollector):
method setUp (line 44) | def setUp(self):
method _prepare_io (line 51) | def _prepare_io(self, input_data, dtype):
method test_classificattion (line 61) | def test_classificattion(self):
method test_classificattion_unsupported_data_type (line 88) | def test_classificattion_unsupported_data_type(self):
method test_classification_output_tensor_too_large (line 103) | def test_classification_output_tensor_too_large(self):
FILE: qa/L0_parameters/model_repository/parameter/1/model.py
class TritonPythonModel (line 33) | class TritonPythonModel:
method auto_complete_config (line 35) | def auto_complete_config(auto_complete_model_config):
method execute (line 60) | def execute(self, requests):
FILE: qa/L0_parameters/parameters_test.py
class InferenceParametersTest (line 49) | class InferenceParametersTest(IsolatedAsyncioTestCase):
method asyncSetUp (line 50) | async def asyncSetUp(self):
method create_inputs (line 94) | def create_inputs(self, client_type):
method send_request_and_verify (line 102) | async def send_request_and_verify(
method verify_outputs (line 138) | def verify_outputs(self, result, parameters):
method test_grpc_parameter (line 153) | async def test_grpc_parameter(self):
method test_http_parameter (line 156) | async def test_http_parameter(self):
method test_async_http_parameter (line 159) | async def test_async_http_parameter(self):
method test_async_grpc_parameter (line 164) | async def test_async_grpc_parameter(self):
method test_http_async_parameter (line 169) | def test_http_async_parameter(self):
method test_grpc_async_parameter (line 182) | def test_grpc_async_parameter(self):
method test_grpc_stream_parameter (line 199) | def test_grpc_stream_parameter(self):
method test_ensemble_parameter_forwarding (line 217) | async def test_ensemble_parameter_forwarding(self):
method asyncTearDown (line 220) | async def asyncTearDown(self):
FILE: qa/L0_passive_instance/passive_instance_test.py
class PassiveInstanceTest (line 40) | class PassiveInstanceTest(tu.TestResultCollector):
method test_inference (line 41) | def test_inference(self):
FILE: qa/L0_perf_pyclients/simple_perf_client.py
function parse_model_grpc (line 40) | def parse_model_grpc(model_metadata, model_config):
function parse_model_http (line 101) | def parse_model_http(model_metadata, model_config):
function requestGenerator (line 168) | def requestGenerator(input_name, input_data, output_name, dtype, protocol):
FILE: qa/L0_python_api/test_kserve.py
class TestHttpOptions (line 40) | class TestHttpOptions:
method test_correct_http_parameters (line 41) | def test_correct_http_parameters(self):
method test_wrong_http_parameters (line 46) | def test_wrong_http_parameters(self):
class TestGrpcOptions (line 58) | class TestGrpcOptions:
method test_correct_grpc_parameters (line 59) | def test_correct_grpc_parameters(self):
method test_wrong_grpc_parameters (line 67) | def test_wrong_grpc_parameters(self):
class TestMetricsOptions (line 95) | class TestMetricsOptions:
method test_correct_http_parameters (line 96) | def test_correct_http_parameters(self):
method test_wrong_http_parameters (line 99) | def test_wrong_http_parameters(self):
class TestKServe (line 116) | class TestKServe:
method test_server_ready (line 119) | def test_server_ready(self, frontend, client_type, url):
method test_service_double_start (line 132) | def test_service_double_start(self, frontend):
method test_invalid_options (line 147) | def test_invalid_options(self, frontend):
method test_server_service_order (line 161) | def test_server_service_order(self, frontend):
method test_service_custom_port (line 170) | def test_service_custom_port(self, frontend, client_type):
method test_inference (line 185) | def test_inference(self, frontend, client_type, url):
method test_streaming_inference (line 197) | def test_streaming_inference(self, frontend, client_type, url):
method test_http_generate_inference (line 208) | def test_http_generate_inference(self, frontend, client_type, url):
method test_http_req_during_shutdown (line 219) | def test_http_req_during_shutdown(self, frontend, client_type, url):
method test_grpc_req_during_shutdown (line 261) | def test_grpc_req_during_shutdown(self, frontend, client_type, url):
method test_metrics_default_port (line 317) | def test_metrics_default_port(self, frontend, url):
method test_metrics_custom_port (line 331) | def test_metrics_custom_port(self, frontend, port=8005):
method test_metrics_update (line 345) | def test_metrics_update(self, frontend, url):
FILE: qa/L0_python_api/test_model_repository/delayed_identity/1/model.py
class TritonPythonModel (line 32) | class TritonPythonModel:
method execute (line 33) | def execute(self, requests):
FILE: qa/L0_python_api/test_model_repository/identity/1/model.py
class TritonPythonModel (line 31) | class TritonPythonModel:
method initialize (line 36) | def initialize(self, args):
method execute (line 40) | def execute(self, requests):
FILE: qa/L0_python_api/testing_utils.py
function setup_server (line 38) | def setup_server(model_repository="test_model_repository") -> tritonserv...
function teardown_server (line 57) | def teardown_server(server: tritonserver.Server) -> None:
function setup_service (line 61) | def setup_service(
function teardown_service (line 74) | def teardown_service(service: Union[KServeHttp, KServeGrpc]) -> None:
function setup_client (line 78) | def setup_client(
function teardown_client (line 87) | def teardown_client(
function send_and_test_inference_identity (line 96) | def send_and_test_inference_identity(
function send_and_test_stream_inference (line 126) | def send_and_test_stream_inference(
function send_and_test_generate_inference (line 166) | def send_and_test_generate_inference() -> bool:
function get_metrics (line 189) | def get_metrics(metrics_url: str, model_name: str = "identity") -> Tuple...
function _extract_inference_count (line 203) | def _extract_inference_count(metrics_data: str, model_name: str):
FILE: qa/L0_pytorch_python_runtime/infer.py
function infer_model_without_parameter_file (line 38) | def infer_model_without_parameter_file():
function infer_model_with_parameter_file (line 90) | def infer_model_with_parameter_file(batch_size, data_offset=0):
function parallel_infer_a_full_dynamic_batch (line 127) | def parallel_infer_a_full_dynamic_batch(max_batch_size):
FILE: qa/L0_pytorch_python_runtime/unit_test.py
class PyTorchPythonBackendRuntimeUnittest (line 40) | class PyTorchPythonBackendRuntimeUnittest(unittest.TestCase):
method test_gather_torch_tensors (line 108) | def test_gather_torch_tensors(self):
method test_scatter_torch_tensors (line 132) | def test_scatter_torch_tensors(self):
FILE: qa/L0_query/query_e2e.py
class QueryTest (line 42) | class QueryTest(tu.TestResultCollector):
method test_http (line 43) | def test_http(self):
method test_http_shared_memory (line 56) | def test_http_shared_memory(self):
method test_http_out_of_shared_memory (line 96) | def test_http_out_of_shared_memory(self):
method test_grpc (line 137) | def test_grpc(self):
method test_grpc_shared_memory (line 150) | def test_grpc_shared_memory(self):
method test_grpc_out_of_shared_memory (line 186) | def test_grpc_out_of_shared_memory(self):
FILE: qa/L0_rate_limiter/rate_limiter_test.py
class AsyncGrpcRunner (line 51) | class AsyncGrpcRunner:
method __init__ (line 52) | def __init__(self, tester, server_url, model_name, delay_ms):
method _on_result (line 70) | def _on_result(self, result, error):
method req_loop (line 80) | def req_loop(self):
method start (line 148) | def start(self):
method _validate_run (line 151) | def _validate_run(self):
method join (line 165) | def join(self):
class RateLimiterTest (line 170) | class RateLimiterTest(su.SequenceBatcherTestUtil):
method stress_models (line 171) | def stress_models(self, model_names, delay_ms=0):
method test_single_model (line 191) | def test_single_model(self):
method test_cross_model_prioritization_limited_resource (line 200) | def test_cross_model_prioritization_limited_resource(self):
method test_cross_model_prioritization_plenty_resource (line 221) | def test_cross_model_prioritization_plenty_resource(self):
method test_single_model_dynamic_batching (line 242) | def test_single_model_dynamic_batching(self):
method test_single_model_sequence_batching (line 278) | def test_single_model_sequence_batching(self):
FILE: qa/L0_request_cancellation/grpc_cancellation_test.py
class UserData (line 43) | class UserData:
method __init__ (line 44) | def __init__(self):
function callback (line 48) | def callback(user_data, result, error):
class GrpcCancellationTest (line 55) | class GrpcCancellationTest(unittest.IsolatedAsyncioTestCase):
method setUp (line 60) | def setUp(self):
method tearDown (line 69) | def tearDown(self):
method _prepare_request (line 73) | def _prepare_request(self):
method _assert_max_duration (line 80) | def _assert_max_duration(self):
method _assert_callback_cancelled (line 89) | def _assert_callback_cancelled(self):
method test_grpc_async_infer (line 95) | def test_grpc_async_infer(self):
method test_grpc_stream_infer (line 107) | def test_grpc_stream_infer(self):
method test_aio_grpc_async_infer (line 116) | async def test_aio_grpc_async_infer(self):
method test_aio_grpc_stream_infer (line 127) | async def test_aio_grpc_stream_infer(self):
method test_grpc_async_infer_cancellation_at_step_start (line 142) | def test_grpc_async_infer_cancellation_at_step_start(self):
method test_grpc_async_infer_response_complete_during_cancellation (line 182) | def test_grpc_async_infer_response_complete_during_cancellation(self):
method test_grpc_async_infer_cancellation_before_finish_0 (line 205) | def test_grpc_async_infer_cancellation_before_finish_0(self):
method test_grpc_async_infer_cancellation_before_finish_1 (line 225) | def test_grpc_async_infer_cancellation_before_finish_1(self):
method test_grpc_async_infer_cancellation_before_response_complete_and_process_after_final_response (line 250) | def test_grpc_async_infer_cancellation_before_response_complete_and_pr...
FILE: qa/L0_request_cancellation/implicit_state_model/gen_model.py
class ImplicitStateModel (line 32) | class ImplicitStateModel(torch.nn.Module):
method __init__ (line 33) | def __init__(self):
method forward (line 36) | def forward(self, delay_itrs, seq_start, seq_id, seq_state_in):
FILE: qa/L0_request_cancellation/implicit_state_test.py
class TestImplicitState (line 37) | class TestImplicitState(unittest.TestCase):
method _get_inputs (line 38) | def _get_inputs(self, delay_itrs):
method _generate_streaming_callback_and_response_pair (line 44) | def _generate_streaming_callback_and_response_pair(self):
method _sequence_state_model_infer (line 52) | def _sequence_state_model_infer(self, num_reqs, seq_ids, delay_itrs, c...
method test_state_reset_after_cancel (line 72) | def test_state_reset_after_cancel(self):
FILE: qa/L0_request_cancellation/scheduler_test.py
class TestScheduler (line 40) | class TestScheduler(unittest.TestCase):
method setUp (line 41) | def setUp(self):
method _get_inputs (line 45) | def _get_inputs(self, batch_size):
method _generate_callback_and_response_pair (line 53) | def _generate_callback_and_response_pair(self):
method _assert_response_is_cancelled (line 63) | def _assert_response_is_cancelled(self, response):
method _generate_streaming_callback_and_response_pair (line 69) | def _generate_streaming_callback_and_response_pair(self):
method _assert_streaming_response_is_cancelled (line 77) | def _assert_streaming_response_is_cancelled(self, response):
method _get_metrics (line 89) | def _get_metrics(self):
method _metrics_before_test (line 95) | def _metrics_before_test(self, model, reason):
method _assert_metrics (line 104) | def _assert_metrics(
method test_dynamic_batch_scheduler_request_cancellation (line 113) | def test_dynamic_batch_scheduler_request_cancellation(self):
method test_sequence_batch_scheduler_backlog_request_cancellation (line 140) | def test_sequence_batch_scheduler_backlog_request_cancellation(self):
method test_direct_sequence_batch_scheduler_request_cancellation (line 187) | def test_direct_sequence_batch_scheduler_request_cancellation(self):
method test_oldest_sequence_batch_scheduler_request_cancellation (line 200) | def test_oldest_sequence_batch_scheduler_request_cancellation(self):
method _test_sequence_batch_scheduler_queued_request_cancellation (line 205) | def _test_sequence_batch_scheduler_queued_request_cancellation(self, m...
method test_ensemble_scheduler_request_cancellation (line 237) | def test_ensemble_scheduler_request_cancellation(self):
method test_scheduler_streaming_request_cancellation (line 250) | def test_scheduler_streaming_request_cancellation(self):
FILE: qa/L0_response_cache/ensemble_cache_test.py
class EnsembleCacheTest (line 46) | class EnsembleCacheTest(tu.TestResultCollector):
method setUp (line 47) | def setUp(self):
method _update_config (line 73) | def _update_config(self, config_file, config_pattern, config_to_add):
method _add_instance_group_cpu (line 82) | def _add_instance_group_cpu(self, config_file):
method _remove_config (line 91) | def _remove_config(self, config_file, config_to_remove):
method _reset_config_files (line 99) | def _reset_config_files(self):
method _run_ensemble (line 104) | def _run_ensemble(self):
method _get_model_statistics (line 118) | def _get_model_statistics(self, model):
method _run_inference_and_validate (line 132) | def _run_inference_and_validate(self, model):
method test_ensemble_top_level_response_cache (line 186) | def test_ensemble_top_level_response_cache(self):
method test_ensemble_all_models_cache_enabled (line 216) | def test_ensemble_all_models_cache_enabled(self):
method test_ensemble_composing_model_cache_enabled (line 254) | def test_ensemble_composing_model_cache_enabled(self):
method test_ensemble_cache_insertion_failure (line 285) | def test_ensemble_cache_insertion_failure(self):
method tearDown (line 320) | def tearDown(self):
FILE: qa/L0_response_cache/generate_random_data.py
function generate_input_data (line 34) | def generate_input_data(num_inputs, batch_size, output_file):
FILE: qa/L0_response_statistics/response_statistics_test.py
class TestResponseStatistics (line 37) | class TestResponseStatistics(unittest.TestCase):
method setUp (line 38) | def setUp(self):
method _generate_streaming_callback_and_response_pair (line 52) | def _generate_streaming_callback_and_response_pair(self):
method _stream_infer (line 66) | def _stream_infer(self, number_of_responses, cancel_at_response_size=N...
method _update_statistics_counts (line 94) | def _update_statistics_counts(
method _check_statistics_count_and_duration (line 132) | def _check_statistics_count_and_duration(
method _get_response_statistics (line 157) | def _get_response_statistics(self):
method _check_response_stats (line 191) | def _check_response_stats(
method test_response_statistics (line 212) | def test_response_statistics(self):
method test_response_statistics_cancel (line 234) | def test_response_statistics_cancel(self):
FILE: qa/L0_sagemaker/sagemaker_generate_stream_test.py
class SageMakerGenerateStreamTest (line 42) | class SageMakerGenerateStreamTest(tu.TestResultCollector):
method setUp (line 43) | def setUp(self):
method generate_stream (line 47) | def generate_stream(self, inputs, stream=False):
method generate_stream_expect_success (line 60) | def generate_stream_expect_success(self, inputs, expected_output, rep_...
method check_sse_responses (line 65) | def check_sse_responses(self, res, expected_res):
method test_generate_stream (line 94) | def test_generate_stream(self):
FILE: qa/L0_sagemaker/sagemaker_generate_test.py
class SageMakerGenerateTest (line 41) | class SageMakerGenerateTest(tu.TestResultCollector):
method setUp (line 42) | def setUp(self):
method generate (line 46) | def generate(self, inputs):
method test_generate (line 51) | def test_generate(self):
FILE: qa/L0_sagemaker/sagemaker_multi_model_test.py
class SageMakerMultiModelTest (line 44) | class SageMakerMultiModelTest(tu.TestResultCollector):
method setUp (line 45) | def setUp(self):
method test_sm_0_environment_variables_set (line 121) | def test_sm_0_environment_variables_set(self):
method test_sm_1_model_load (line 128) | def test_sm_1_model_load(self):
method test_sm_2_model_list (line 162) | def test_sm_2_model_list(self):
method test_sm_3_model_get (line 199) | def test_sm_3_model_get(self):
method test_sm_4_model_invoke (line 213) | def test_sm_4_model_invoke(self):
method test_sm_5_model_unload (line 292) | def test_sm_5_model_unload(self):
method test_sm_6_ensemble_model (line 323) | def test_sm_6_ensemble_model(self):
FILE: qa/L0_sagemaker/sagemaker_request_many_chunks.py
class SagemakerRequestManyChunksTest (line 32) | class SagemakerRequestManyChunksTest(unittest.TestCase):
method setUp (line 33) | def setUp(self):
method send_chunked_request (line 40) | def send_chunked_request(
method test_load_model (line 79) | def test_load_model(self):
FILE: qa/L0_sagemaker/sagemaker_test.py
class SageMakerTest (line 43) | class SageMakerTest(tu.TestResultCollector):
method setUp (line 44) | def setUp(self):
method test_direct_inference (line 87) | def test_direct_inference(self):
method test_inference_client_generated_request (line 116) | def test_inference_client_generated_request(self):
method test_inference_client_generated_request_binary (line 146) | def test_inference_client_generated_request_binary(self):
method test_inference_client_generated_response (line 183) | def test_inference_client_generated_response(self):
method test_inference_client_generated_response_binary (line 213) | def test_inference_client_generated_response_binary(self):
method test_malformed_binary_header (line 249) | def test_malformed_binary_header(self):
method test_malformed_binary_header_not_number (line 284) | def test_malformed_binary_header_not_number(self):
method test_malformed_binary_header_negative_number (line 319) | def test_malformed_binary_header_negative_number(self):
method test_malformed_binary_header_large_number (line 352) | def test_malformed_binary_header_large_number(self):
FILE: qa/L0_scalar_io/scalar_test.py
class ScalarIOTest (line 42) | class ScalarIOTest(tu.TestResultCollector):
method setUp (line 43) | def setUp(self):
method _send_request_and_verify_result (line 47) | def _send_request_and_verify_result(self, input, model_name):
method test_scalar_io (line 57) | def test_scalar_io(self):
FILE: qa/L0_sdk/grpc_test.cc
function main (line 33) | int
FILE: qa/L0_sdk/http_test.cc
function main (line 33) | int
FILE: qa/L0_sequence_batcher/sequence_batcher_test.py
function is_ensemble (line 115) | def is_ensemble(model_name):
class SequenceBatcherTest (line 122) | class SequenceBatcherTest(su.SequenceBatcherTestUtil):
method get_datatype (line 123) | def get_datatype(self, trial):
method get_expected_result (line 140) | def get_expected_result(self, expected_result, value, trial, flag_str=...
method get_expected_result_implicit (line 155) | def get_expected_result_implicit(
method test_simple_sequence (line 169) | def test_simple_sequence(self):
method test_length1_sequence (line 235) | def test_length1_sequence(self):
method test_batch_size (line 291) | def test_batch_size(self):
method test_no_correlation_id (line 374) | def test_no_correlation_id(self):
method test_no_sequence_start (line 446) | def test_no_sequence_start(self):
method test_no_sequence_start2 (line 526) | def test_no_sequence_start2(self):
method test_no_sequence_end (line 610) | def test_no_sequence_end(self):
method test_half_batch (line 671) | def test_half_batch(self):
method test_skip_batch (line 795) | def test_skip_batch(self):
method test_full_batch (line 983) | def test_full_batch(self):
method test_ragged_batch (line 1166) | def test_ragged_batch(self):
method test_ragged_batch_allowed (line 1365) | def test_ragged_batch_allowed(self):
method test_backlog (line 1561) | def test_backlog(self):
method test_backlog_fill (line 1776) | def test_backlog_fill(self):
method test_backlog_fill_no_end (line 2013) | def test_backlog_fill_no_end(self):
method test_backlog_same_correlation_id (line 2262) | def test_backlog_same_correlation_id(self):
method test_backlog_same_correlation_id_no_end (line 2480) | def test_backlog_same_correlation_id_no_end(self):
method test_backlog_sequence_timeout (line 2699) | def test_backlog_sequence_timeout(self):
method test_queue_delay_no_min_util (line 2938) | def test_queue_delay_no_min_util(self):
method test_queue_delay_half_min_util (line 3049) | def test_queue_delay_half_min_util(self):
method test_queue_delay_full_min_util (line 3160) | def test_queue_delay_full_min_util(self):
class SequenceBatcherRequestTimeoutTest (line 3272) | class SequenceBatcherRequestTimeoutTest(su.SequenceBatcherTestUtil):
method setUp (line 3273) | def setUp(self):
method send_sequence_with_timeout (line 3294) | def send_sequence_with_timeout(
method test_request_timeout (line 3321) | def test_request_timeout(self):
method test_send_request_after_timeout (line 3380) | def test_send_request_after_timeout(self):
class SequenceBatcherPreserveOrderingTest (line 3432) | class SequenceBatcherPreserveOrderingTest(su.SequenceBatcherTestUtil):
method setUp (line 3433) | def setUp(self):
method send_sequence (line 3454) | def send_sequence(self, seq_id, seq_id_map, req_id_map):
method _test_sequence_ordering (line 3484) | def _test_sequence_ordering(self, preserve_ordering, decoupled):
method test_sequence_with_preserve_ordering (line 3592) | def test_sequence_with_preserve_ordering(self):
method test_sequence_without_preserve_ordering (line 3596) | def test_sequence_without_preserve_ordering(self):
FILE: qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
class SequenceCorrIDBatcherTest (line 59) | class SequenceCorrIDBatcherTest(su.SequenceBatcherTestUtil):
method get_datatype (line 60) | def get_datatype(self, trial):
method get_expected_result (line 63) | def get_expected_result(self, expected_result, corrid, value, trial, f...
method data_type_to_string (line 81) | def data_type_to_string(self, dtype):
method test_skip_batch (line 87) | def test_skip_batch(self):
method test_corrid_data_type (line 231) | def test_corrid_data_type(self):
FILE: qa/L0_sequence_stress/sequence_stress.py
class UserData (line 60) | class UserData:
method __init__ (line 61) | def __init__(self):
function completion_callback (line 66) | def completion_callback(user_data, result, error):
class TimeoutException (line 71) | class TimeoutException(Exception):
function check_sequence_async (line 75) | def check_sequence_async(
function get_datatype (line 172) | def get_datatype(trial):
function sequence_valid (line 179) | def sequence_valid(
function sequence_valid_valid (line 212) | def sequence_valid_valid(
function sequence_valid_no_end (line 258) | def sequence_valid_no_end(
function sequence_no_start (line 304) | def sequence_no_start(client_metadata, rng, trial, model_name, dtype, se...
function sequence_no_end (line 341) | def sequence_no_end(
function stress_thread (line 374) | def stress_thread(name, seed, pass_cnt, correlation_id_base, trial, mode...
function check_status (line 551) | def check_status(model_name):
FILE: qa/L0_server_status/server_status_test.py
class ServerMetadataTest (line 44) | class ServerMetadataTest(tu.TestResultCollector):
method test_basic (line 45) | def test_basic(self):
method test_unknown_model (line 95) | def test_unknown_model(self):
method test_unknown_model_version (line 129) | def test_unknown_model_version(self):
method test_model_latest_infer (line 156) | def test_model_latest_infer(self):
method test_model_specific_infer (line 314) | def test_model_specific_infer(self):
class ModelMetadataTest (line 431) | class ModelMetadataTest(tu.TestResultCollector):
method test_model_versions_deleted (line 437) | def test_model_versions_deleted(self):
method test_model_versions_added (line 512) | def test_model_versions_added(self):
method test_infer_stats_no_model_version (line 613) | def test_infer_stats_no_model_version(self):
method test_infer_stats_no_model (line 702) | def test_infer_stats_no_model(self):
FILE: qa/L0_shared_memory/shared_memory_test.py
class SystemSharedMemoryTestBase (line 48) | class SystemSharedMemoryTestBase(tu.TestResultCollector):
method setUp (line 52) | def setUp(self):
method tearDown (line 56) | def tearDown(self):
method _setup_client (line 59) | def _setup_client(self):
method _configure_server (line 72) | def _configure_server(
method _cleanup_shm_handles (line 141) | def _cleanup_shm_handles(self):
class SharedMemoryTest (line 147) | class SharedMemoryTest(SystemSharedMemoryTestBase):
method test_invalid_create_shm (line 148) | def test_invalid_create_shm(self):
method test_valid_create_set_register (line 156) | def test_valid_create_set_register(self):
method test_unregister_before_register (line 170) | def test_unregister_before_register(self):
method test_unregister_after_register (line 181) | def test_unregister_after_register(self):
method test_reregister_after_register (line 193) | def test_reregister_after_register(self):
method test_unregister_after_inference (line 212) | def test_unregister_after_inference(self):
method test_register_after_inference (line 237) | def test_register_after_inference(self):
method test_too_big_shm (line 270) | def test_too_big_shm(self):
method test_large_shm_register_offset (line 302) | def test_large_shm_register_offset(self):
method test_mixed_raw_shm (line 341) | def test_mixed_raw_shm(self):
method test_unregisterall (line 362) | def test_unregisterall(self):
method test_infer_offset_out_of_bound (line 378) | def test_infer_offset_out_of_bound(self):
method test_infer_byte_size_out_of_bound (line 413) | def test_infer_byte_size_out_of_bound(self):
method test_infer_integer_overflow (line 444) | def test_infer_integer_overflow(self):
method test_register_out_of_bound (line 497) | def test_register_out_of_bound(self):
method test_python_client_leak (line 542) | def test_python_client_leak(self):
method test_register_reserved_names (line 568) | def test_register_reserved_names(self):
method test_register_invalid_shm_key (line 595) | def test_register_invalid_shm_key(self):
function callback (line 616) | def callback(user_data, result, error):
class TestSharedMemoryUnregister (line 623) | class TestSharedMemoryUnregister(SystemSharedMemoryTestBase):
method _create_request_data (line 624) | def _create_request_data(self):
method _test_unregister_shm_request_pass (line 654) | def _test_unregister_shm_request_pass(self):
method _test_shm_not_found (line 667) | def _test_shm_not_found(self):
method _test_shm_found (line 678) | def _test_shm_found(self):
method test_unregister_shm_during_inference_single_req_http (line 687) | def test_unregister_shm_during_inference_single_req_http(self):
method test_unregister_shm_during_inference_multiple_req_http (line 706) | def test_unregister_shm_during_inference_multiple_req_http(self):
method test_unregister_shm_after_inference_http (line 739) | def test_unregister_shm_after_inference_http(self):
method test_unregister_shm_during_inference_single_req_grpc (line 762) | def test_unregister_shm_during_inference_single_req_grpc(self):
method test_unregister_shm_during_inference_multiple_req_grpc (line 789) | def test_unregister_shm_during_inference_multiple_req_grpc(self):
method test_unregister_shm_after_inference_grpc (line 837) | def test_unregister_shm_after_inference_grpc(self):
FILE: qa/L0_simple_ensemble/backpressure_test_models/decoupled_producer/1/model.py
class TritonPythonModel (line 34) | class TritonPythonModel:
method execute (line 39) | def execute(self, requests):
FILE: qa/L0_simple_ensemble/ensemble_backpressure_test.py
class UserData (line 53) | class UserData:
method __init__ (line 54) | def __init__(self):
function callback (line 58) | def callback(user_data, result, error):
function prepare_infer_args (line 65) | def prepare_infer_args(input_value):
function collect_responses (line 76) | def collect_responses(user_data):
class EnsembleBackpressureTest (line 108) | class EnsembleBackpressureTest(tu.TestResultCollector):
method _run_inference (line 113) | def _run_inference(self, model_name, expected_responses_count=32):
method test_max_inflight_requests_limit_4 (line 151) | def test_max_inflight_requests_limit_4(self):
method test_max_inflight_requests_limit_1 (line 158) | def test_max_inflight_requests_limit_1(self):
method test_max_inflight_requests_limit_disabled (line 164) | def test_max_inflight_requests_limit_disabled(self):
method test_max_inflight_requests_limit_concurrent_requests (line 170) | def test_max_inflight_requests_limit_concurrent_requests(self):
method test_max_inflight_requests_limit_request_cancellation (line 221) | def test_max_inflight_requests_limit_request_cancellation(self):
class EnsembleStepMaxQueueSizeTest (line 299) | class EnsembleStepMaxQueueSizeTest(tu.TestResultCollector):
method _run_inference (line 300) | def _run_inference(self, model_name, expected_responses_count):
method _run_concurrent_inference (line 356) | def _run_concurrent_inference(self, model_name, expected_responses_cou...
method test_step1_max_queue_size (line 420) | def test_step1_max_queue_size(self):
method test_step2_max_queue_size (line 479) | def test_step2_max_queue_size(self):
FILE: qa/L0_simple_ensemble/ensemble_test.py
class RequestGenerator (line 51) | class RequestGenerator:
method __init__ (line 52) | def __init__(self, init_value, num_requests) -> None:
method __enter__ (line 57) | def __enter__(self):
method __iter__ (line 60) | def __iter__(self):
method __next__ (line 63) | def __next__(self) -> bytes:
class EnsembleTest (line 73) | class EnsembleTest(tu.TestResultCollector):
method _get_infer_count_per_version (line 74) | def _get_infer_count_per_version(self, model_name):
method test_ensemble_add_sub (line 99) | def test_ensemble_add_sub(self):
method test_ensemble_add_sub_one_output (line 112) | def test_ensemble_add_sub_one_output(self):
method test_ensemble_sequence_flags (line 135) | def test_ensemble_sequence_flags(self):
method test_ensemble_partial_add_sub (line 181) | def test_ensemble_partial_add_sub(self):
FILE: qa/L0_simple_ensemble/models/partial_add_sub/1/model.py
class TritonPythonModel (line 31) | class TritonPythonModel:
method execute (line 32) | def execute(self, requests):
FILE: qa/L0_storage_S3_local/mock_s3_service.py
class MockS3Service (line 34) | class MockS3Service:
method __init__ (line 38) | def __init__(self):
method __enter__ (line 80) | def __enter__(self):
method __exit__ (line 83) | def __exit__(self, exc_type, exc_val, exc_tb):
method TestPassed (line 88) | def TestPassed(self):
FILE: qa/L0_storage_swiftstack/infer_test.py
class InferTest (line 40) | class InferTest(tu.TestResultCollector):
method _full_exact (line 41) | def _full_exact(
method test_raw_fff (line 163) | def test_raw_fff(self):
method test_class_fff (line 173) | def test_class_fff(self):
FILE: qa/L0_string_io/string_client_test.py
class ClientStringTest (line 42) | class ClientStringTest(tu.TestResultCollector):
method _test_infer_unicode (line 43) | def _test_infer_unicode(self, model_name, client, input_):
method _test_infer_non_unicode (line 70) | def _test_infer_non_unicode(self, model_name, client, input_, binary_d...
method _test_unicode_bytes_dtype (line 102) | def _test_unicode_bytes_dtype(self, client, model_name, dtype="|S78"):
method _test_str_dtype (line 137) | def _test_str_dtype(self, client, model_name, dtype=np.object_):
method _test_bytes (line 144) | def _test_bytes(self, model_name):
method test_unicode_bytes (line 202) | def test_unicode_bytes(self):
FILE: qa/L0_trace/models/input_all_required/1/model.py
class TritonPythonModel (line 34) | class TritonPythonModel:
method initialize (line 35) | def initialize(self, args):
method execute (line 38) | def execute(self, requests):
FILE: qa/L0_trace/opentelemetry_unittest.py
function callback (line 51) | def callback(user_data, result, error):
function prepare_data (line 58) | def prepare_data(client, is_binary=True):
function send_bls_request (line 78) | def send_bls_request(model_name="simple", headers=None):
class UserData (line 86) | class UserData:
method __init__ (line 87) | def __init__(self):
class OpenTelemetryTest (line 91) | class OpenTelemetryTest(tu.TestResultCollector):
method setUp (line 92) | def setUp(self):
method tearDown (line 137) | def tearDown(self):
method _get_inputs (line 144) | def _get_inputs(self, batch_size):
method _generate_callback_and_response_pair (line 150) | def _generate_callback_and_response_pair(self):
method _parse_trace_log (line 160) | def _parse_trace_log(self, trace_log):
method _check_events (line 178) | def _check_events(self, span_name, events, is_cancelled):
method _test_resource_attributes (line 283) | def _test_resource_attributes(self, attributes):
method _verify_contents (line 316) | def _verify_contents(self, spans, expected_counts, is_cancelled):
method _verify_nesting (line 355) | def _verify_nesting(self, spans, expect
Copy disabled (too large)
Download .json
Condensed preview — 1631 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (10,268K chars).
[
{
"path": ".clang-format",
"chars": 791,
"preview": "---\nBasedOnStyle: Google\n\nIndentWidth: 2\nColumnLimit: 80\nContinuationIndentWidth: 4\nUseTab: Never\nMaxEmptyLinesToKeep: 2"
},
{
"path": ".dockerignore",
"chars": 6,
"preview": ".git*\n"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.md",
"chars": 612,
"preview": "---\nname: Bug report\nabout: Create a report to help us improve\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n**Description**\n"
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.md",
"chars": 595,
"preview": "---\nname: Feature request\nabout: Suggest an idea for this project\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n**Is your fea"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE/pull_request_template_external_contrib.md",
"chars": 2034,
"preview": "#### What does the PR do?\n<!-- Describe your pull request here. Please read the text below the line, and make sure you f"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE/pull_request_template_internal_contrib.md",
"chars": 1812,
"preview": "#### What does the PR do?\n<!-- Describe your pull request here. Please read the text below the line, and make sure you f"
},
{
"path": ".github/pull_request_template.md",
"chars": 814,
"preview": "Thanks for submitting a PR to Triton!\nPlease go the the `Preview` tab above this description box and select the appropri"
},
{
"path": ".github/workflows/codeql.yml",
"chars": 3685,
"preview": "# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and binary "
},
{
"path": ".github/workflows/pre-commit.yml",
"chars": 2038,
"preview": "# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": ".gitignore",
"chars": 220,
"preview": "/build\n/builddir\n/.vscode\n*.so\n__pycache__\ntmp\n*.log\n*.xml\ntest_results.txt\nartifacts\ncprofile\n*.prof\n.venv\n**/.venv\n\n# "
},
{
"path": ".pre-commit-config.yaml",
"chars": 3193,
"preview": "# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "CITATION.cff",
"chars": 320,
"preview": "cff-version: 1.2.0\nmessage: \"If you use this software, please cite it as below.\"\ntitle: \"Triton Inference Server: An Opt"
},
{
"path": "CMakeLists.txt",
"chars": 12836,
"preview": "# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "CONTRIBUTING.md",
"chars": 5892,
"preview": "<!--\n# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "Dockerfile.QA",
"chars": 19442,
"preview": "# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "Dockerfile.sdk",
"chars": 9270,
"preview": "# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "Dockerfile.win10.min",
"chars": 8832,
"preview": "# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "LICENSE",
"chars": 1490,
"preview": "Copyright (c) 2018-2026, NVIDIA CORPORATION. All rights reserved.\n\nRedistribution and use in source and binary forms, wi"
},
{
"path": "README.md",
"chars": 14816,
"preview": "<!--\n# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "SECURITY.md",
"chars": 2570,
"preview": "<!--\n# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "TRITON_VERSION",
"chars": 10,
"preview": "2.67.0dev\n"
},
{
"path": "build.py",
"chars": 113534,
"preview": "#!/usr/bin/env python3\n# Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution a"
},
{
"path": "compose.py",
"chars": 17415,
"preview": "#!/usr/bin/env python3\n# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution a"
},
{
"path": "deploy/alibaba-cloud/README.md",
"chars": 8531,
"preview": "<!--\n# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "deploy/aws/Chart.yaml",
"chars": 1654,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/aws/README.md",
"chars": 10073,
"preview": "<!--\n# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "deploy/aws/dashboard.json",
"chars": 8583,
"preview": "{\n \"__inputs\": [\n {\n \"name\": \"DS_PROMETHEUS\",\n \"label\": \"Prometheus\",\n \"description\": \"\",\n \"type"
},
{
"path": "deploy/aws/templates/_helpers.tpl",
"chars": 3918,
"preview": "{{/*\n# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "deploy/aws/templates/deployment.yaml",
"chars": 3705,
"preview": "# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/aws/templates/secrets.yaml",
"chars": 1765,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/aws/templates/service.yaml",
"chars": 3419,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/aws/values.yaml",
"chars": 1848,
"preview": "# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/fleetcommand/Chart.yaml",
"chars": 2178,
"preview": "# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/fleetcommand/README.md",
"chars": 6277,
"preview": "<!--\n# Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "deploy/fleetcommand/dashboard.json",
"chars": 8740,
"preview": "{\n \"__requires\": [\n {\n \"type\": \"grafana\",\n \"id\": \"grafana\",\n \"name\": \"Grafana\",\n \"version\": \"6.3"
},
{
"path": "deploy/fleetcommand/templates/_helpers.tpl",
"chars": 3918,
"preview": "{{/*\n# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "deploy/fleetcommand/templates/configmap-grafana-dashboard.yaml",
"chars": 1792,
"preview": "# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and binary "
},
{
"path": "deploy/fleetcommand/templates/deployment.yaml",
"chars": 4029,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/fleetcommand/templates/secrets.yaml",
"chars": 1891,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/fleetcommand/templates/service.yaml",
"chars": 3816,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/fleetcommand/values.yaml",
"chars": 3218,
"preview": "# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/gcp/Chart.yaml",
"chars": 1654,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/gcp/README.md",
"chars": 11085,
"preview": "<!--\n# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "deploy/gcp/dashboard.json",
"chars": 8583,
"preview": "{\n \"__inputs\": [\n {\n \"name\": \"DS_PROMETHEUS\",\n \"label\": \"Prometheus\",\n \"description\": \"\",\n \"type"
},
{
"path": "deploy/gcp/templates/_helpers.tpl",
"chars": 3918,
"preview": "{{/*\n# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "deploy/gcp/templates/deployment.yaml",
"chars": 3096,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/gcp/templates/service.yaml",
"chars": 3419,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/gcp/values.yaml",
"chars": 1765,
"preview": "# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/gke-marketplace-app/README.md",
"chars": 13921,
"preview": "<!--\n# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "deploy/gke-marketplace-app/benchmark/README.md",
"chars": 5332,
"preview": "<!--\n# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_cpu/config.pbtxt",
"chars": 1703,
"preview": "# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bin"
},
{
"path": "deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_gpu/config.pbtxt",
"chars": 1707,
"preview": "# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu/config.pbtxt",
"chars": 1750,
"preview": "# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu_seqlen128/config.pbtxt",
"chars": 1729,
"preview": "# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bin"
},
{
"path": "deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_cpu/config.pbtxt",
"chars": 1708,
"preview": "# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_gpu/config.pbtxt",
"chars": 1707,
"preview": "# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/benchmark/perf-analyzer-script/perf_query.sh",
"chars": 2345,
"preview": "#!/usr/bin/env bash\n# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and u"
},
{
"path": "deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml",
"chars": 1925,
"preview": "# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "deploy/gke-marketplace-app/client-sample/bert_request.json",
"chars": 1990,
"preview": "{\n \"inputs\": [{\n \"name\": \"input_ids\",\n \"shape\": [1, 128],\n \"datatype\": \"INT32\",\n \"parameters\": {},\n \"dat"
},
{
"path": "deploy/gke-marketplace-app/client-sample/locustfile_bert.py",
"chars": 2784,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "deploy/gke-marketplace-app/client-sample/perf_analyzer_grpc.sh",
"chars": 2466,
"preview": "#!/usr/bin/env bash\n# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and u"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/Dockerfile",
"chars": 1610,
"preview": "# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bin"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/build_and_push.sh",
"chars": 2517,
"preview": "#!/bin/bash\n# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in s"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml",
"chars": 1669,
"preview": "# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/chart/triton/templates/_helpers.tpl",
"chars": 2650,
"preview": "{{/*\n# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/chart/triton/templates/application.yaml",
"chars": 2948,
"preview": "# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/chart/triton/templates/deployment.yaml",
"chars": 4262,
"preview": "# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/chart/triton/templates/hpa.yaml",
"chars": 2140,
"preview": "# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bin"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/chart/triton/templates/ingress.yaml",
"chars": 2084,
"preview": "# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bin"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/chart/triton/templates/service.yaml",
"chars": 2325,
"preview": "# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml",
"chars": 2503,
"preview": "# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml",
"chars": 5723,
"preview": "# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/server-deployer/schema.yaml",
"chars": 5720,
"preview": "# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "deploy/gke-marketplace-app/trt-engine/README.md",
"chars": 3097,
"preview": "<!--\n# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "deploy/k8s-onprem/Chart.yaml",
"chars": 1943,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/k8s-onprem/README.md",
"chars": 15735,
"preview": "<!--\n# Copyright (c) 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "deploy/k8s-onprem/dashboard.json",
"chars": 27650,
"preview": "{\n \"__inputs\": [\n {\n \"name\": \"DS_PROMETHEUS\",\n \"label\": \"Prometheus\",\n \"description\": \"\",\n \"type"
},
{
"path": "deploy/k8s-onprem/templates/_helpers.tpl",
"chars": 4718,
"preview": "{{/*\n# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "deploy/k8s-onprem/templates/deployment.yaml",
"chars": 4299,
"preview": "# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/k8s-onprem/templates/hpa.yaml",
"chars": 2548,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/k8s-onprem/templates/ingressroute.yaml",
"chars": 2956,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/k8s-onprem/templates/rbac.yaml",
"chars": 4001,
"preview": "# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/k8s-onprem/templates/service.yaml",
"chars": 3533,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/k8s-onprem/templates/serviceaccount.yaml",
"chars": 1952,
"preview": "# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/k8s-onprem/values.yaml",
"chars": 3034,
"preview": "# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/mlflow-triton-plugin/README.md",
"chars": 8884,
"preview": "<!--\n# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "deploy/mlflow-triton-plugin/examples/expected_output.json",
"chars": 166,
"preview": "{\"outputs\":\n {\n \"OUTPUT0\": [[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]],\n \"OUTPUT1\": [[0, 0, 0,"
},
{
"path": "deploy/mlflow-triton-plugin/examples/input.json",
"chars": 229,
"preview": "{\"inputs\":\n {\n \"INPUT0\": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]],\n"
},
{
"path": "deploy/mlflow-triton-plugin/examples/onnx_float32_int32_int32/config.pbtxt",
"chars": 1962,
"preview": "\n# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and b"
},
{
"path": "deploy/mlflow-triton-plugin/mlflow_triton/__init__.py",
"chars": 1571,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "deploy/mlflow-triton-plugin/mlflow_triton/config.py",
"chars": 5614,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "deploy/mlflow-triton-plugin/mlflow_triton/deployments.py",
"chars": 22354,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "deploy/mlflow-triton-plugin/scripts/publish_model_to_mlflow.py",
"chars": 2813,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "deploy/mlflow-triton-plugin/scripts/triton_flavor.py",
"chars": 4378,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "deploy/mlflow-triton-plugin/setup.py",
"chars": 1996,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "deploy/oci/Chart.yaml",
"chars": 1654,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/oci/README.md",
"chars": 12625,
"preview": "<!--\n# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "deploy/oci/dashboard.json",
"chars": 8583,
"preview": "{\n \"__inputs\": [\n {\n \"name\": \"DS_PROMETHEUS\",\n \"label\": \"Prometheus\",\n \"description\": \"\",\n \"type"
},
{
"path": "deploy/oci/templates/_helpers.tpl",
"chars": 3918,
"preview": "{{/*\n# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "deploy/oci/templates/deployment.yaml",
"chars": 3705,
"preview": "# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/oci/templates/secrets.yaml",
"chars": 1765,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/oci/templates/service.yaml",
"chars": 3419,
"preview": "# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "deploy/oci/values.yaml",
"chars": 1909,
"preview": "# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "docker/README.third-party-src",
"chars": 163,
"preview": "This directory contains the licenses and source code for software\nincluded in the Triton Inference Server build. To extr"
},
{
"path": "docker/cpu_only/entrypoint.d/12-banner.sh",
"chars": 524,
"preview": "#!/bin/bash\n# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n\nprodname_uc=$(echo \"${NVIDIA_P"
},
{
"path": "docker/cpu_only/entrypoint.d/50-gpu-driver-check2.sh",
"chars": 121,
"preview": "#!/bin/bash\n# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n\nexport TRITON_SERVER_CPU_ONLY=1"
},
{
"path": "docker/cpu_only/nvidia_entrypoint.sh",
"chars": 2114,
"preview": "#!/bin/bash\n# Copyright 2016-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in s"
},
{
"path": "docker/entrypoint.d/10-banner.txt",
"chars": 91,
"preview": "\n=============================\n== Triton Inference Server ==\n=============================\n"
},
{
"path": "docker/entrypoint.d/15-container-copyright.txt",
"chars": 81,
"preview": "\nCopyright (c) 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n"
},
{
"path": "docker/entrypoint.d/50-gpu-driver-check2.sh",
"chars": 172,
"preview": "#!/bin/bash\n# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n\nif [[ \"${NVIDIA_CPU_ONLY:-0}\" ="
},
{
"path": "docker/entrypoint.d/56-network-driver-version-check.sh",
"chars": 1,
"preview": "\n"
},
{
"path": "docker/entrypoint.d/70-shm-check.sh",
"chars": 1,
"preview": "\n"
},
{
"path": "docker/entrypoint.d/99-check-run-aip-mode.sh",
"chars": 387,
"preview": "#!/bin/bash\n# Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n\n# If detect Vertex AI envi"
},
{
"path": "docker/sagemaker/serve",
"chars": 8847,
"preview": "#!/bin/bash\n# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use "
},
{
"path": "docs/Dockerfile.docs",
"chars": 3306,
"preview": "# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "docs/Makefile",
"chars": 2530,
"preview": "# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "docs/README.md",
"chars": 18633,
"preview": "<!--\n# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/_reference/tritonclient_api.rst",
"chars": 1928,
"preview": "..\n # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n #\n # Redistribution and use in source an"
},
{
"path": "docs/_static/.gitattributes",
"chars": 153,
"preview": "nvidia-logo-horiz-rgb-blk-for-screen.png filter=lfs diff=lfs merge=lfs -text\nnvidia-logo-vert-rgb-blk-for-screen.png fil"
},
{
"path": "docs/_static/custom.css",
"chars": 10016,
"preview": "/*\n# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and"
},
{
"path": "docs/_static/rtd-data.js",
"chars": 1815,
"preview": "/*\n# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bina"
},
{
"path": "docs/_templates/layout.html",
"chars": 1681,
"preview": "<!--\n# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "docs/backend_guide/vllm.rst",
"chars": 1753,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/client_guide/api_reference.rst",
"chars": 1685,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/client_guide/in_process.rst",
"chars": 3566,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/client_guide/kserve.rst",
"chars": 1932,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/client_guide/kserve_extension.rst",
"chars": 2565,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/client_guide/python.rst",
"chars": 1888,
"preview": "..\n.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/conf.py",
"chars": 13158,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "docs/contents.rst",
"chars": 4262,
"preview": "..\n.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/customization_guide/build.md",
"chars": 23357,
"preview": "<!--\n# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/customization_guide/compose.md",
"chars": 6644,
"preview": "<!--\n# Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "docs/customization_guide/deploy.md",
"chars": 13960,
"preview": "<!--\n# Copyright (c) 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "docs/customization_guide/inference_protocols.md",
"chars": 11841,
"preview": "<!--\n# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/customization_guide/inprocess_c_api.md",
"chars": 11925,
"preview": "<!--\n# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/customization_guide/inprocess_java_api.md",
"chars": 7813,
"preview": "<!--\n# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/customization_guide/repository_agents.md",
"chars": 6974,
"preview": "<!--\n# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "docs/customization_guide/sagemaker.md",
"chars": 2165,
"preview": "<!--\n# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "docs/customization_guide/test.md",
"chars": 5589,
"preview": "<!--\n# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/customization_guide/tritonfrontend.md",
"chars": 6033,
"preview": "<!--\n# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and bi"
},
{
"path": "docs/examples/README.md",
"chars": 1953,
"preview": "<!--\n# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/examples/fetch_models.sh",
"chars": 2623,
"preview": "#!/bin/bash\n# Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and"
},
{
"path": "docs/examples/jetson/README.md",
"chars": 3470,
"preview": "<!--\n# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/Makefile",
"chars": 2208,
"preview": "# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/README.md",
"chars": 12676,
"preview": "<!--\n# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/common.h",
"chars": 5128,
"preview": "// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n//\n// Redistribution and use in source and"
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/labels.txt",
"chars": 17,
"preview": "person\nbag\nface\n\n"
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc",
"chars": 37211,
"preview": "// Copyright (c) 2021, NVIDIA CORPORATION& AFFILIATES.All rights reserved.\n//\n// Redistribution and use in source and bi"
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/tao/convert_peoplenet.sh",
"chars": 1934,
"preview": "#!/bin/bash\n# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source"
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/tao/models/peoplenet/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/1/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/config.pbtxt",
"chars": 402,
"preview": "name: \"peoplenet\"\nplatform: \"tensorrt_plan\"\nmax_batch_size: 64\ninput [\n {\n name: \"input_1\"\n data_type: TYPE_FP32\n"
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/1/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/config.pbtxt",
"chars": 365,
"preview": "name: \"peoplenet\"\nplatform: \"tensorrt_plan\"\nmax_batch_size: 64\ninput [\n {\n name: \"input_1\"\n data_type: TYPE_FP32\n"
},
{
"path": "docs/examples/model_repository/densenet_onnx/config.pbtxt",
"chars": 387,
"preview": "name: \"densenet_onnx\"\nplatform: \"onnxruntime_onnx\"\nmax_batch_size : 0\ninput [\n {\n name: \"data_0\"\n data_type: TYPE"
},
{
"path": "docs/examples/model_repository/densenet_onnx/densenet_labels.txt",
"chars": 10311,
"preview": "TENCH\nGOLDFISH\nWHITE SHARK\nTIGER SHARK\nHAMMERHEAD SHARK\nELECTRIC RAY\nSTINGRAY\nROOSTER\nHEN\nOSTRICH\nBRAMBLING\nGOLDFINCH\nHO"
},
{
"path": "docs/examples/model_repository/inception_onnx/config.pbtxt",
"chars": 1945,
"preview": "# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary forms, wi"
},
{
"path": "docs/examples/model_repository/inception_onnx/inception_labels.txt",
"chars": 10329,
"preview": "UNUSED BACKGROUND\nTENCH\nGOLDFISH\nWHITE SHARK\nTIGER SHARK\nHAMMERHEAD SHARK\nELECTRIC RAY\nSTINGRAY\nROOSTER\nHEN\nOSTRICH\nBRAM"
},
{
"path": "docs/examples/model_repository/simple/config.pbtxt",
"chars": 1900,
"preview": "# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary forms, wi"
},
{
"path": "docs/examples/model_repository/simple_dyna_sequence/config.pbtxt",
"chars": 2708,
"preview": "# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "docs/examples/model_repository/simple_identity/1/model.py",
"chars": 2184,
"preview": "# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source and binary "
},
{
"path": "docs/examples/model_repository/simple_identity/config.pbtxt",
"chars": 1759,
"preview": "# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary forms, wi"
},
{
"path": "docs/examples/model_repository/simple_int8/config.pbtxt",
"chars": 1902,
"preview": "# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary forms, wi"
},
{
"path": "docs/examples/model_repository/simple_sequence/config.pbtxt",
"chars": 2163,
"preview": "# Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "docs/examples/model_repository/simple_string/config.pbtxt",
"chars": 1912,
"preview": "# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary forms, wi"
},
{
"path": "docs/exclusions.txt",
"chars": 57,
"preview": "README.md\nexamples/README.md\nuser_guide/perf_analyzer.md\n"
},
{
"path": "docs/generate_docs.py",
"chars": 13531,
"preview": "#!/usr/bin/env python3\n\n# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution "
},
{
"path": "docs/getting_started/llm.md",
"chars": 49002,
"preview": "<!--\n# Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "docs/getting_started/quick_deployment.rst",
"chars": 2033,
"preview": "..\n.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/getting_started/quickstart.md",
"chars": 6528,
"preview": "<!--\n# Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "docs/getting_started/trtllm_user_guide.md",
"chars": 7134,
"preview": "<!--\n# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/index.md",
"chars": 6614,
"preview": "<!--\n# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/introduction/compatibility.md",
"chars": 9775,
"preview": "<!--\n# Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "docs/introduction/index.md",
"chars": 6693,
"preview": "<!--\n# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/introduction/release_notes.md",
"chars": 2035,
"preview": "<!--\n# Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "docs/llm_features/speculative_decoding.rst",
"chars": 1873,
"preview": "..\n.. Copyright 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/perf_benchmark/genai_perf.rst",
"chars": 2058,
"preview": "..\n.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/perf_benchmark/model_analyzer.rst",
"chars": 2515,
"preview": "..\n.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/perf_benchmark/perf_analyzer.rst",
"chars": 2143,
"preview": "..\n.. Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/protocol/README.md",
"chars": 5676,
"preview": "<!--\n# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/protocol/extension_binary_data.md",
"chars": 7668,
"preview": "<!--\n# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/protocol/extension_classification.md",
"chars": 6333,
"preview": "<!--\n# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "docs/protocol/extension_generate.md",
"chars": 6929,
"preview": "<!--\n# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/protocol/extension_logging.md",
"chars": 8782,
"preview": "<!--\n# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "docs/protocol/extension_model_configuration.md",
"chars": 4158,
"preview": "<!--\n# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary"
},
{
"path": "docs/protocol/extension_model_repository.md",
"chars": 12781,
"preview": "<!--\n# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/protocol/extension_parameters.md",
"chars": 4514,
"preview": "<!--\n# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/protocol/extension_schedule_policy.md",
"chars": 4160,
"preview": "<!--\n# Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/protocol/extension_sequence.md",
"chars": 5175,
"preview": "<!--\n# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in sour"
},
{
"path": "docs/protocol/extension_shared_memory.md",
"chars": 18258,
"preview": "<!--\n# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.\n#\n# Redistribution and use in source and binary form"
},
{
"path": "docs/protocol/extension_statistics.md",
"chars": 20332,
"preview": "<!--\n# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/protocol/extension_trace.md",
"chars": 7655,
"preview": "<!--\n# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source an"
},
{
"path": "docs/repositories.txt",
"chars": 196,
"preview": "backend\nclient\ndali_backend\nfil_backend\nmodel_analyzer\nmodel_navigator\nonnxruntime_backend\nperf_analyzer\npython_backend\n"
},
{
"path": "docs/scaling_guide/scaling_guide.rst",
"chars": 1887,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/server_guide/features.rst",
"chars": 2084,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/server_guide/model_pipelines.rst",
"chars": 1779,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/server_guide/state_management.rst",
"chars": 1761,
"preview": "..\n.. Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n..\n.. Redistribution and use in source "
},
{
"path": "docs/user_guide/architecture.md",
"chars": 3232,
"preview": "<!--\n# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/user_guide/batcher.md",
"chars": 14180,
"preview": "<!--\n# Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
},
{
"path": "docs/user_guide/bls.md",
"chars": 19859,
"preview": "<!--\n# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n#\n# Redistribution and use in source a"
}
]
// ... and 1431 more files (download for full content)
About this extraction
This page contains the full source code of the triton-inference-server/server GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 1631 files (9.3 MB), approximately 2.5M tokens, and a symbol index with 3571 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.